@midscene/computer 1.8.1-beta-20260513084557.0 → 1.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/cli.mjs +325 -374
- package/dist/es/index.mjs +379 -385
- package/dist/es/mcp-server.mjs +325 -374
- package/dist/lib/cli.js +323 -372
- package/dist/lib/index.js +381 -384
- package/dist/lib/mcp-server.js +324 -373
- package/dist/types/index.d.ts +18 -4
- package/dist/types/mcp-server.d.ts +9 -4
- package/package.json +3 -3
package/dist/es/mcp-server.mjs
CHANGED
|
@@ -6,14 +6,14 @@ import { existsSync } from "node:fs";
|
|
|
6
6
|
import { createRequire } from "node:module";
|
|
7
7
|
import { dirname, resolve as external_node_path_resolve } from "node:path";
|
|
8
8
|
import { fileURLToPath } from "node:url";
|
|
9
|
-
import {
|
|
10
|
-
import { actionHoverParamSchema, actionTapParamSchema, defineAction, defineActionClearInput, defineActionDoubleClick, defineActionDragAndDrop, defineActionHover, defineActionInput, defineActionKeyboardPress, defineActionRightClick, defineActionScroll, defineActionTap } from "@midscene/core/device";
|
|
9
|
+
import { defineAction, defineActionsFromInputPrimitives } from "@midscene/core/device";
|
|
11
10
|
import { sleep } from "@midscene/core/utils";
|
|
12
11
|
import { createImgBase64ByFormat } from "@midscene/shared/img";
|
|
13
12
|
import { getDebug } from "@midscene/shared/logger";
|
|
14
13
|
import screenshot_desktop from "screenshot-desktop";
|
|
15
14
|
import { once } from "node:events";
|
|
16
15
|
import { createInterface } from "node:readline";
|
|
16
|
+
import { z } from "@midscene/core";
|
|
17
17
|
import { BaseMidsceneTools } from "@midscene/shared/mcp/base-tools";
|
|
18
18
|
const debugXvfb = getDebug('computer:xvfb');
|
|
19
19
|
function checkXvfbInstalled() {
|
|
@@ -92,15 +92,6 @@ function _define_property(obj, key, value) {
|
|
|
92
92
|
else obj[key] = value;
|
|
93
93
|
return obj;
|
|
94
94
|
}
|
|
95
|
-
const computerInputParamSchema = z.object({
|
|
96
|
-
value: z.string().describe('The text to input'),
|
|
97
|
-
mode: z["enum"]([
|
|
98
|
-
'replace',
|
|
99
|
-
'clear',
|
|
100
|
-
'append'
|
|
101
|
-
]).default('replace').optional().describe('Input mode: replace, clear, or append'),
|
|
102
|
-
locate: getMidsceneLocationSchema().describe('The input field to be filled').optional()
|
|
103
|
-
});
|
|
104
95
|
const SMOOTH_MOVE_STEPS_TAP = 8;
|
|
105
96
|
const SMOOTH_MOVE_STEPS_MOUSE_MOVE = 10;
|
|
106
97
|
const SMOOTH_MOVE_DELAY_TAP = 8;
|
|
@@ -411,7 +402,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
|
|
|
411
402
|
}
|
|
412
403
|
async healthCheck() {
|
|
413
404
|
console.log('[HealthCheck] Starting health check...');
|
|
414
|
-
console.log("[HealthCheck] @midscene/computer v1.8.1
|
|
405
|
+
console.log("[HealthCheck] @midscene/computer v1.8.1");
|
|
415
406
|
console.log('[HealthCheck] Taking screenshot...');
|
|
416
407
|
const screenshotTimeout = 15000;
|
|
417
408
|
let timeoutId;
|
|
@@ -477,21 +468,38 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
|
|
|
477
468
|
debugDevice('Taking screenshot', {
|
|
478
469
|
displayId: this.displayId
|
|
479
470
|
});
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
471
|
+
const options = {
|
|
472
|
+
format: 'png'
|
|
473
|
+
};
|
|
474
|
+
if (void 0 !== this.displayId) if ('darwin' === process.platform) {
|
|
475
|
+
const screenIndex = Number(this.displayId);
|
|
476
|
+
if (!Number.isNaN(screenIndex)) options.screen = screenIndex;
|
|
477
|
+
} else options.screen = this.displayId;
|
|
478
|
+
debugDevice('Screenshot options', options);
|
|
479
|
+
const MAX_ATTEMPTS = 3;
|
|
480
|
+
const RETRY_DELAY_MS = 300;
|
|
481
|
+
let lastRawMessage = '';
|
|
482
|
+
for(let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++)try {
|
|
489
483
|
const buffer = await screenshot_desktop(options);
|
|
484
|
+
if (attempt > 1) debugDevice(`Screenshot succeeded on attempt ${attempt}`);
|
|
490
485
|
return createImgBase64ByFormat('png', buffer.toString('base64'));
|
|
491
486
|
} catch (error) {
|
|
492
|
-
|
|
493
|
-
|
|
487
|
+
lastRawMessage = error instanceof Error ? error.message : String(error);
|
|
488
|
+
const isMacTransient = 'darwin' === process.platform && /could not create image from display/i.test(lastRawMessage);
|
|
489
|
+
const willRetry = isMacTransient && attempt < MAX_ATTEMPTS;
|
|
490
|
+
debugDevice(`Screenshot attempt ${attempt} failed: ${lastRawMessage}${willRetry ? ' — retrying' : ''}`);
|
|
491
|
+
if (!willRetry) break;
|
|
492
|
+
await sleep(RETRY_DELAY_MS);
|
|
494
493
|
}
|
|
494
|
+
if ('darwin' === process.platform && /could not create image from display/i.test(lastRawMessage)) throw new Error(`Failed to take screenshot on macOS: the host process is missing Screen Recording permission, or the target display is locked/sleeping.
|
|
495
|
+
|
|
496
|
+
Please follow these steps:
|
|
497
|
+
1. Open System Settings > Privacy & Security > Screen Recording
|
|
498
|
+
2. Enable the application running this script (e.g., Terminal, iTerm2, VS Code, WebStorm, or Midscene Studio)
|
|
499
|
+
3. Fully quit and relaunch that application after granting permission — macOS only re-reads this permission on process launch.
|
|
500
|
+
|
|
501
|
+
Original error: ${lastRawMessage}`);
|
|
502
|
+
throw new Error(`Failed to take screenshot: ${lastRawMessage}`);
|
|
495
503
|
}
|
|
496
504
|
async size() {
|
|
497
505
|
node_assert(libnut, 'libnut not initialized');
|
|
@@ -537,228 +545,111 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
|
|
|
537
545
|
node_assert(libnut, 'libnut not initialized');
|
|
538
546
|
await this.typeViaClipboard(text);
|
|
539
547
|
}
|
|
548
|
+
async selectAllAndDelete() {
|
|
549
|
+
node_assert(libnut, 'libnut not initialized');
|
|
550
|
+
if (this.useAppleScript) {
|
|
551
|
+
sendKeyViaAppleScript('a', [
|
|
552
|
+
'command'
|
|
553
|
+
]);
|
|
554
|
+
await sleep(50);
|
|
555
|
+
sendKeyViaAppleScript('backspace', []);
|
|
556
|
+
return;
|
|
557
|
+
}
|
|
558
|
+
const modifier = 'darwin' === process.platform ? 'command' : 'control';
|
|
559
|
+
libnut.keyTap('a', [
|
|
560
|
+
modifier
|
|
561
|
+
]);
|
|
562
|
+
await sleep(50);
|
|
563
|
+
libnut.keyTap('backspace');
|
|
564
|
+
}
|
|
565
|
+
async pressKeyboardShortcut(keyName) {
|
|
566
|
+
node_assert(libnut, 'libnut not initialized');
|
|
567
|
+
const keys = keyName.split('+');
|
|
568
|
+
const modifiers = keys.slice(0, -1).map(normalizeKeyName);
|
|
569
|
+
const key = normalizePrimaryKey(keys[keys.length - 1]);
|
|
570
|
+
debugDevice('KeyboardPress', {
|
|
571
|
+
original: keyName,
|
|
572
|
+
key,
|
|
573
|
+
modifiers,
|
|
574
|
+
driver: this.useAppleScript ? "applescript" : 'libnut'
|
|
575
|
+
});
|
|
576
|
+
if (this.useAppleScript) sendKeyViaAppleScript(key, modifiers);
|
|
577
|
+
else if (modifiers.length > 0) libnut.keyTap(key, modifiers);
|
|
578
|
+
else libnut.keyTap(key);
|
|
579
|
+
}
|
|
580
|
+
async performScroll(param) {
|
|
581
|
+
node_assert(libnut, 'libnut not initialized');
|
|
582
|
+
if (param.locate) {
|
|
583
|
+
const element = param.locate;
|
|
584
|
+
const [x, y] = element.center;
|
|
585
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
586
|
+
}
|
|
587
|
+
const scrollType = param?.scrollType;
|
|
588
|
+
const edgeSpec = scrollType && scrollType in EDGE_SCROLL_SPEC ? EDGE_SCROLL_SPEC[scrollType] : null;
|
|
589
|
+
if (edgeSpec) {
|
|
590
|
+
if (runPhasedScroll(edgeSpec.direction, EDGE_SCROLL_TOTAL_PX, EDGE_SCROLL_STEPS)) return void await sleep(SCROLL_COMPLETE_DELAY);
|
|
591
|
+
if (this.useAppleScript) {
|
|
592
|
+
sendKeyViaAppleScript(edgeSpec.key);
|
|
593
|
+
await sleep(SCROLL_COMPLETE_DELAY);
|
|
594
|
+
return;
|
|
595
|
+
}
|
|
596
|
+
const [dx, dy] = edgeSpec.libnut;
|
|
597
|
+
for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
|
|
598
|
+
libnut.scrollMouse(dx, dy);
|
|
599
|
+
await sleep(SCROLL_STEP_DELAY);
|
|
600
|
+
}
|
|
601
|
+
return;
|
|
602
|
+
}
|
|
603
|
+
if ('singleAction' === scrollType || !scrollType) {
|
|
604
|
+
const distance = param?.distance || 500;
|
|
605
|
+
const direction = param?.direction || 'down';
|
|
606
|
+
const isKnownDirection = 'up' === direction || 'down' === direction || 'left' === direction || 'right' === direction;
|
|
607
|
+
if (isKnownDirection) {
|
|
608
|
+
const steps = Math.max(PHASED_MIN_STEPS, Math.round(distance / PHASED_PIXELS_PER_STEP));
|
|
609
|
+
if (runPhasedScroll(direction, distance, steps)) return void await sleep(SCROLL_COMPLETE_DELAY);
|
|
610
|
+
}
|
|
611
|
+
if (this.useAppleScript && ('up' === direction || 'down' === direction)) {
|
|
612
|
+
const pages = Math.max(1, Math.round(distance / APPROX_VIEWPORT_HEIGHT_PX));
|
|
613
|
+
const key = 'up' === direction ? 'pageup' : 'pagedown';
|
|
614
|
+
for(let i = 0; i < pages; i++){
|
|
615
|
+
sendKeyViaAppleScript(key);
|
|
616
|
+
await sleep(SCROLL_STEP_DELAY);
|
|
617
|
+
}
|
|
618
|
+
await sleep(SCROLL_COMPLETE_DELAY);
|
|
619
|
+
return;
|
|
620
|
+
}
|
|
621
|
+
const ticks = Math.ceil(distance / 100);
|
|
622
|
+
const directionMap = {
|
|
623
|
+
up: [
|
|
624
|
+
0,
|
|
625
|
+
ticks
|
|
626
|
+
],
|
|
627
|
+
down: [
|
|
628
|
+
0,
|
|
629
|
+
-ticks
|
|
630
|
+
],
|
|
631
|
+
left: [
|
|
632
|
+
-ticks,
|
|
633
|
+
0
|
|
634
|
+
],
|
|
635
|
+
right: [
|
|
636
|
+
ticks,
|
|
637
|
+
0
|
|
638
|
+
]
|
|
639
|
+
};
|
|
640
|
+
const [dx, dy] = directionMap[direction] || [
|
|
641
|
+
0,
|
|
642
|
+
-ticks
|
|
643
|
+
];
|
|
644
|
+
libnut.scrollMouse(dx, dy);
|
|
645
|
+
await sleep(SCROLL_COMPLETE_DELAY);
|
|
646
|
+
return;
|
|
647
|
+
}
|
|
648
|
+
throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
|
|
649
|
+
}
|
|
540
650
|
actionSpace() {
|
|
541
651
|
const defaultActions = [
|
|
542
|
-
|
|
543
|
-
node_assert(libnut, 'libnut not initialized');
|
|
544
|
-
const element = param.locate;
|
|
545
|
-
node_assert(element, 'Element not found, cannot tap');
|
|
546
|
-
const [x, y] = element.center;
|
|
547
|
-
const targetX = Math.round(x);
|
|
548
|
-
const targetY = Math.round(y);
|
|
549
|
-
await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
|
|
550
|
-
libnut.mouseToggle('down', 'left');
|
|
551
|
-
await sleep(CLICK_HOLD_DURATION);
|
|
552
|
-
libnut.mouseToggle('up', 'left');
|
|
553
|
-
}),
|
|
554
|
-
defineActionDoubleClick(async (param)=>{
|
|
555
|
-
node_assert(libnut, 'libnut not initialized');
|
|
556
|
-
const element = param.locate;
|
|
557
|
-
node_assert(element, 'Element not found, cannot double click');
|
|
558
|
-
const [x, y] = element.center;
|
|
559
|
-
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
560
|
-
libnut.mouseClick('left', true);
|
|
561
|
-
}),
|
|
562
|
-
defineActionRightClick(async (param)=>{
|
|
563
|
-
node_assert(libnut, 'libnut not initialized');
|
|
564
|
-
const element = param.locate;
|
|
565
|
-
node_assert(element, 'Element not found, cannot right click');
|
|
566
|
-
const [x, y] = element.center;
|
|
567
|
-
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
568
|
-
libnut.mouseClick('right');
|
|
569
|
-
}),
|
|
570
|
-
defineAction({
|
|
571
|
-
name: 'MouseMove',
|
|
572
|
-
description: 'Move the mouse to the element',
|
|
573
|
-
interfaceAlias: 'aiHover',
|
|
574
|
-
paramSchema: actionHoverParamSchema,
|
|
575
|
-
sample: {
|
|
576
|
-
locate: {
|
|
577
|
-
prompt: 'the navigation menu item "Products"'
|
|
578
|
-
}
|
|
579
|
-
},
|
|
580
|
-
call: async (param)=>{
|
|
581
|
-
node_assert(libnut, 'libnut not initialized');
|
|
582
|
-
const element = param.locate;
|
|
583
|
-
node_assert(element, 'Element not found, cannot move mouse');
|
|
584
|
-
const [x, y] = element.center;
|
|
585
|
-
const targetX = Math.round(x);
|
|
586
|
-
const targetY = Math.round(y);
|
|
587
|
-
await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_MOUSE_MOVE, SMOOTH_MOVE_DELAY_MOUSE_MOVE);
|
|
588
|
-
await sleep(MOUSE_MOVE_EFFECT_WAIT);
|
|
589
|
-
}
|
|
590
|
-
}),
|
|
591
|
-
defineAction({
|
|
592
|
-
name: 'Input',
|
|
593
|
-
description: 'Input text into the input field',
|
|
594
|
-
interfaceAlias: 'aiInput',
|
|
595
|
-
paramSchema: computerInputParamSchema,
|
|
596
|
-
sample: {
|
|
597
|
-
value: 'test@example.com',
|
|
598
|
-
locate: {
|
|
599
|
-
prompt: 'the email input field'
|
|
600
|
-
}
|
|
601
|
-
},
|
|
602
|
-
call: async (param)=>{
|
|
603
|
-
node_assert(libnut, 'libnut not initialized');
|
|
604
|
-
const element = param.locate;
|
|
605
|
-
if (element) {
|
|
606
|
-
const [x, y] = element.center;
|
|
607
|
-
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
608
|
-
libnut.mouseClick('left');
|
|
609
|
-
await sleep(INPUT_FOCUS_DELAY);
|
|
610
|
-
if ('append' !== param.mode) {
|
|
611
|
-
if (this.useAppleScript) {
|
|
612
|
-
sendKeyViaAppleScript('a', [
|
|
613
|
-
'command'
|
|
614
|
-
]);
|
|
615
|
-
await sleep(50);
|
|
616
|
-
sendKeyViaAppleScript('backspace', []);
|
|
617
|
-
} else {
|
|
618
|
-
const modifier = 'darwin' === process.platform ? 'command' : 'control';
|
|
619
|
-
libnut.keyTap('a', [
|
|
620
|
-
modifier
|
|
621
|
-
]);
|
|
622
|
-
await sleep(50);
|
|
623
|
-
libnut.keyTap('backspace');
|
|
624
|
-
}
|
|
625
|
-
await sleep(INPUT_CLEAR_DELAY);
|
|
626
|
-
}
|
|
627
|
-
}
|
|
628
|
-
if ('clear' === param.mode) return;
|
|
629
|
-
if (!param.value) return;
|
|
630
|
-
await this.smartTypeString(param.value);
|
|
631
|
-
}
|
|
632
|
-
}),
|
|
633
|
-
defineActionScroll(async (param)=>{
|
|
634
|
-
node_assert(libnut, 'libnut not initialized');
|
|
635
|
-
if (param.locate) {
|
|
636
|
-
const element = param.locate;
|
|
637
|
-
const [x, y] = element.center;
|
|
638
|
-
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
639
|
-
}
|
|
640
|
-
const scrollType = param?.scrollType;
|
|
641
|
-
const edgeSpec = scrollType && scrollType in EDGE_SCROLL_SPEC ? EDGE_SCROLL_SPEC[scrollType] : null;
|
|
642
|
-
if (edgeSpec) {
|
|
643
|
-
if (runPhasedScroll(edgeSpec.direction, EDGE_SCROLL_TOTAL_PX, EDGE_SCROLL_STEPS)) return void await sleep(SCROLL_COMPLETE_DELAY);
|
|
644
|
-
if (this.useAppleScript) {
|
|
645
|
-
sendKeyViaAppleScript(edgeSpec.key);
|
|
646
|
-
await sleep(SCROLL_COMPLETE_DELAY);
|
|
647
|
-
return;
|
|
648
|
-
}
|
|
649
|
-
const [dx, dy] = edgeSpec.libnut;
|
|
650
|
-
for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
|
|
651
|
-
libnut.scrollMouse(dx, dy);
|
|
652
|
-
await sleep(SCROLL_STEP_DELAY);
|
|
653
|
-
}
|
|
654
|
-
return;
|
|
655
|
-
}
|
|
656
|
-
if ('singleAction' === scrollType || !scrollType) {
|
|
657
|
-
const distance = param?.distance || 500;
|
|
658
|
-
const direction = param?.direction || 'down';
|
|
659
|
-
const isKnownDirection = 'up' === direction || 'down' === direction || 'left' === direction || 'right' === direction;
|
|
660
|
-
if (isKnownDirection) {
|
|
661
|
-
const steps = Math.max(PHASED_MIN_STEPS, Math.round(distance / PHASED_PIXELS_PER_STEP));
|
|
662
|
-
if (runPhasedScroll(direction, distance, steps)) return void await sleep(SCROLL_COMPLETE_DELAY);
|
|
663
|
-
}
|
|
664
|
-
if (this.useAppleScript && ('up' === direction || 'down' === direction)) {
|
|
665
|
-
const pages = Math.max(1, Math.round(distance / APPROX_VIEWPORT_HEIGHT_PX));
|
|
666
|
-
const key = 'up' === direction ? 'pageup' : 'pagedown';
|
|
667
|
-
for(let i = 0; i < pages; i++){
|
|
668
|
-
sendKeyViaAppleScript(key);
|
|
669
|
-
await sleep(SCROLL_STEP_DELAY);
|
|
670
|
-
}
|
|
671
|
-
await sleep(SCROLL_COMPLETE_DELAY);
|
|
672
|
-
return;
|
|
673
|
-
}
|
|
674
|
-
const ticks = Math.ceil(distance / 100);
|
|
675
|
-
const directionMap = {
|
|
676
|
-
up: [
|
|
677
|
-
0,
|
|
678
|
-
ticks
|
|
679
|
-
],
|
|
680
|
-
down: [
|
|
681
|
-
0,
|
|
682
|
-
-ticks
|
|
683
|
-
],
|
|
684
|
-
left: [
|
|
685
|
-
-ticks,
|
|
686
|
-
0
|
|
687
|
-
],
|
|
688
|
-
right: [
|
|
689
|
-
ticks,
|
|
690
|
-
0
|
|
691
|
-
]
|
|
692
|
-
};
|
|
693
|
-
const [dx, dy] = directionMap[direction] || [
|
|
694
|
-
0,
|
|
695
|
-
-ticks
|
|
696
|
-
];
|
|
697
|
-
libnut.scrollMouse(dx, dy);
|
|
698
|
-
await sleep(SCROLL_COMPLETE_DELAY);
|
|
699
|
-
return;
|
|
700
|
-
}
|
|
701
|
-
throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
|
|
702
|
-
}),
|
|
703
|
-
defineActionKeyboardPress(async (param)=>{
|
|
704
|
-
node_assert(libnut, 'libnut not initialized');
|
|
705
|
-
if (param.locate) {
|
|
706
|
-
const [x, y] = param.locate.center;
|
|
707
|
-
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
708
|
-
libnut.mouseClick('left');
|
|
709
|
-
await sleep(50);
|
|
710
|
-
}
|
|
711
|
-
const keys = param.keyName.split('+');
|
|
712
|
-
const modifiers = keys.slice(0, -1).map(normalizeKeyName);
|
|
713
|
-
const key = normalizePrimaryKey(keys[keys.length - 1]);
|
|
714
|
-
debugDevice('KeyboardPress', {
|
|
715
|
-
original: param.keyName,
|
|
716
|
-
key,
|
|
717
|
-
modifiers,
|
|
718
|
-
driver: this.useAppleScript ? "applescript" : 'libnut'
|
|
719
|
-
});
|
|
720
|
-
if (this.useAppleScript) sendKeyViaAppleScript(key, modifiers);
|
|
721
|
-
else if (modifiers.length > 0) libnut.keyTap(key, modifiers);
|
|
722
|
-
else libnut.keyTap(key);
|
|
723
|
-
}),
|
|
724
|
-
defineActionDragAndDrop(async (param)=>{
|
|
725
|
-
node_assert(libnut, 'libnut not initialized');
|
|
726
|
-
const from = param.from;
|
|
727
|
-
const to = param.to;
|
|
728
|
-
node_assert(from, 'missing "from" param for drag and drop');
|
|
729
|
-
node_assert(to, 'missing "to" param for drag and drop');
|
|
730
|
-
const [fromX, fromY] = from.center;
|
|
731
|
-
const [toX, toY] = to.center;
|
|
732
|
-
libnut.moveMouse(Math.round(fromX), Math.round(fromY));
|
|
733
|
-
libnut.mouseToggle('down', 'left');
|
|
734
|
-
await sleep(100);
|
|
735
|
-
libnut.moveMouse(Math.round(toX), Math.round(toY));
|
|
736
|
-
await sleep(100);
|
|
737
|
-
libnut.mouseToggle('up', 'left');
|
|
738
|
-
}),
|
|
739
|
-
defineActionClearInput(async (param)=>{
|
|
740
|
-
node_assert(libnut, 'libnut not initialized');
|
|
741
|
-
const element = param.locate;
|
|
742
|
-
node_assert(element, 'Element not found, cannot clear input');
|
|
743
|
-
const [x, y] = element.center;
|
|
744
|
-
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
745
|
-
libnut.mouseClick('left');
|
|
746
|
-
await sleep(100);
|
|
747
|
-
if (this.useAppleScript) {
|
|
748
|
-
sendKeyViaAppleScript('a', [
|
|
749
|
-
'command'
|
|
750
|
-
]);
|
|
751
|
-
await sleep(50);
|
|
752
|
-
sendKeyViaAppleScript('backspace', []);
|
|
753
|
-
} else {
|
|
754
|
-
const modifier = 'darwin' === process.platform ? 'command' : 'control';
|
|
755
|
-
libnut.keyTap('a', [
|
|
756
|
-
modifier
|
|
757
|
-
]);
|
|
758
|
-
libnut.keyTap('backspace');
|
|
759
|
-
}
|
|
760
|
-
await sleep(50);
|
|
761
|
-
})
|
|
652
|
+
...defineActionsFromInputPrimitives(this.inputPrimitives)
|
|
762
653
|
];
|
|
763
654
|
const platformActions = Object.values(createPlatformActions());
|
|
764
655
|
const customActions = this.options?.customActions || [];
|
|
@@ -796,6 +687,88 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
|
|
|
796
687
|
_define_property(this, "xvfbCleanup", void 0);
|
|
797
688
|
_define_property(this, "useAppleScript", void 0);
|
|
798
689
|
_define_property(this, "uri", void 0);
|
|
690
|
+
_define_property(this, "inputPrimitives", {
|
|
691
|
+
pointer: {
|
|
692
|
+
tap: async ({ x, y })=>{
|
|
693
|
+
node_assert(libnut, 'libnut not initialized');
|
|
694
|
+
const targetX = Math.round(x);
|
|
695
|
+
const targetY = Math.round(y);
|
|
696
|
+
await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
|
|
697
|
+
libnut.mouseToggle('down', 'left');
|
|
698
|
+
await sleep(CLICK_HOLD_DURATION);
|
|
699
|
+
libnut.mouseToggle('up', 'left');
|
|
700
|
+
},
|
|
701
|
+
doubleClick: async ({ x, y })=>{
|
|
702
|
+
node_assert(libnut, 'libnut not initialized');
|
|
703
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
704
|
+
libnut.mouseClick('left', true);
|
|
705
|
+
},
|
|
706
|
+
rightClick: async ({ x, y })=>{
|
|
707
|
+
node_assert(libnut, 'libnut not initialized');
|
|
708
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
709
|
+
libnut.mouseClick('right');
|
|
710
|
+
},
|
|
711
|
+
hover: async ({ x, y })=>{
|
|
712
|
+
node_assert(libnut, 'libnut not initialized');
|
|
713
|
+
await smoothMoveMouse(Math.round(x), Math.round(y), SMOOTH_MOVE_STEPS_MOUSE_MOVE, SMOOTH_MOVE_DELAY_MOUSE_MOVE);
|
|
714
|
+
await sleep(MOUSE_MOVE_EFFECT_WAIT);
|
|
715
|
+
},
|
|
716
|
+
dragAndDrop: async (from, to)=>{
|
|
717
|
+
node_assert(libnut, 'libnut not initialized');
|
|
718
|
+
libnut.moveMouse(Math.round(from.x), Math.round(from.y));
|
|
719
|
+
libnut.mouseToggle('down', 'left');
|
|
720
|
+
await sleep(100);
|
|
721
|
+
libnut.moveMouse(Math.round(to.x), Math.round(to.y));
|
|
722
|
+
await sleep(100);
|
|
723
|
+
libnut.mouseToggle('up', 'left');
|
|
724
|
+
}
|
|
725
|
+
},
|
|
726
|
+
keyboard: {
|
|
727
|
+
typeText: async (value, opts)=>{
|
|
728
|
+
node_assert(libnut, 'libnut not initialized');
|
|
729
|
+
const element = opts?.target;
|
|
730
|
+
if (element) {
|
|
731
|
+
const [x, y] = element.center;
|
|
732
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
733
|
+
libnut.mouseClick('left');
|
|
734
|
+
await sleep(INPUT_FOCUS_DELAY);
|
|
735
|
+
if (opts?.replace !== false) {
|
|
736
|
+
await this.selectAllAndDelete();
|
|
737
|
+
await sleep(INPUT_CLEAR_DELAY);
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
await this.smartTypeString(value);
|
|
741
|
+
},
|
|
742
|
+
keyboardPress: async (keyName, opts)=>{
|
|
743
|
+
node_assert(libnut, 'libnut not initialized');
|
|
744
|
+
const target = opts?.target;
|
|
745
|
+
if (target) {
|
|
746
|
+
const [x, y] = target.center;
|
|
747
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
748
|
+
libnut.mouseClick('left');
|
|
749
|
+
await sleep(50);
|
|
750
|
+
}
|
|
751
|
+
await this.pressKeyboardShortcut(keyName);
|
|
752
|
+
},
|
|
753
|
+
clearInput: async (target)=>{
|
|
754
|
+
node_assert(libnut, 'libnut not initialized');
|
|
755
|
+
if (target) {
|
|
756
|
+
const element = target;
|
|
757
|
+
const [x, y] = element.center;
|
|
758
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
759
|
+
libnut.mouseClick('left');
|
|
760
|
+
await sleep(100);
|
|
761
|
+
}
|
|
762
|
+
await this.selectAllAndDelete();
|
|
763
|
+
await sleep(50);
|
|
764
|
+
}
|
|
765
|
+
},
|
|
766
|
+
scroll: {
|
|
767
|
+
scroll: async (param)=>{
|
|
768
|
+
await this.performScroll(param);
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
});
|
|
799
772
|
this.options = options;
|
|
800
773
|
this.displayId = options?.displayId;
|
|
801
774
|
this.useAppleScript = 'darwin' === process.platform && options?.keyboardDriver !== 'libnut';
|
|
@@ -1171,132 +1144,7 @@ class RDPDevice {
|
|
|
1171
1144
|
}
|
|
1172
1145
|
actionSpace() {
|
|
1173
1146
|
const defaultActions = [
|
|
1174
|
-
|
|
1175
|
-
const element = this.requireLocate(locate, 'tap');
|
|
1176
|
-
await this.moveToElement(element, {
|
|
1177
|
-
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1178
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1179
|
-
});
|
|
1180
|
-
await this.backend.mouseButton('left', 'down');
|
|
1181
|
-
await sleep(device_CLICK_HOLD_DURATION);
|
|
1182
|
-
await this.backend.mouseButton('left', 'up');
|
|
1183
|
-
}),
|
|
1184
|
-
defineActionDoubleClick(async ({ locate })=>{
|
|
1185
|
-
const element = this.requireLocate(locate, 'double click');
|
|
1186
|
-
await this.moveToElement(element, {
|
|
1187
|
-
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1188
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1189
|
-
});
|
|
1190
|
-
await this.backend.mouseButton('left', 'doubleClick');
|
|
1191
|
-
}),
|
|
1192
|
-
defineActionRightClick(async ({ locate })=>{
|
|
1193
|
-
const element = this.requireLocate(locate, 'right click');
|
|
1194
|
-
await this.moveToElement(element, {
|
|
1195
|
-
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1196
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1197
|
-
});
|
|
1198
|
-
await this.backend.mouseButton('right', 'click');
|
|
1199
|
-
}),
|
|
1200
|
-
defineActionHover(async ({ locate })=>{
|
|
1201
|
-
const element = this.requireLocate(locate, 'hover');
|
|
1202
|
-
await this.moveToElement(element, {
|
|
1203
|
-
steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
|
|
1204
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE,
|
|
1205
|
-
settleDelayMs: device_MOUSE_MOVE_EFFECT_WAIT
|
|
1206
|
-
});
|
|
1207
|
-
}),
|
|
1208
|
-
defineActionInput(async (param)=>{
|
|
1209
|
-
this.assertConnected();
|
|
1210
|
-
if (param.locate) {
|
|
1211
|
-
await this.moveToElement(param.locate, {
|
|
1212
|
-
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1213
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1214
|
-
});
|
|
1215
|
-
await this.backend.mouseButton('left', 'click');
|
|
1216
|
-
await sleep(device_INPUT_FOCUS_DELAY);
|
|
1217
|
-
}
|
|
1218
|
-
if ('typeOnly' !== param.mode) {
|
|
1219
|
-
await this.clearInput();
|
|
1220
|
-
await sleep(device_INPUT_CLEAR_DELAY);
|
|
1221
|
-
}
|
|
1222
|
-
if ('clear' === param.mode) return;
|
|
1223
|
-
if (param.value) await this.backend.typeText(param.value);
|
|
1224
|
-
}),
|
|
1225
|
-
defineActionClearInput(async ({ locate })=>{
|
|
1226
|
-
this.assertConnected();
|
|
1227
|
-
if (locate) {
|
|
1228
|
-
await this.moveToElement(locate, {
|
|
1229
|
-
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1230
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1231
|
-
});
|
|
1232
|
-
await this.backend.mouseButton('left', 'click');
|
|
1233
|
-
await sleep(device_INPUT_FOCUS_DELAY);
|
|
1234
|
-
}
|
|
1235
|
-
await this.clearInput();
|
|
1236
|
-
await sleep(device_INPUT_CLEAR_DELAY);
|
|
1237
|
-
}),
|
|
1238
|
-
defineActionKeyboardPress(async ({ locate, keyName })=>{
|
|
1239
|
-
this.assertConnected();
|
|
1240
|
-
if (locate) {
|
|
1241
|
-
await this.moveToElement(locate, {
|
|
1242
|
-
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1243
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1244
|
-
});
|
|
1245
|
-
await this.backend.mouseButton('left', 'click');
|
|
1246
|
-
}
|
|
1247
|
-
await this.backend.keyPress(keyName);
|
|
1248
|
-
}),
|
|
1249
|
-
defineActionScroll(async (param)=>{
|
|
1250
|
-
this.assertConnected();
|
|
1251
|
-
const target = param.locate;
|
|
1252
|
-
if (target) await this.moveToElement(target, {
|
|
1253
|
-
steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
|
|
1254
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE
|
|
1255
|
-
});
|
|
1256
|
-
if (param.scrollType && 'singleAction' !== param.scrollType) {
|
|
1257
|
-
const direction = this.edgeScrollDirection(param.scrollType);
|
|
1258
|
-
for(let i = 0; i < device_EDGE_SCROLL_STEPS; i++)await this.performWheel(direction, DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
|
|
1259
|
-
await sleep(device_SCROLL_COMPLETE_DELAY);
|
|
1260
|
-
return;
|
|
1261
|
-
}
|
|
1262
|
-
await this.performWheel(param.direction || 'down', param.distance || DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
|
|
1263
|
-
await sleep(device_SCROLL_COMPLETE_DELAY);
|
|
1264
|
-
}),
|
|
1265
|
-
defineActionDragAndDrop(async ({ from, to })=>{
|
|
1266
|
-
this.assertConnected();
|
|
1267
|
-
const source = this.requireLocate(from, 'drag source');
|
|
1268
|
-
const target = this.requireLocate(to, 'drag target');
|
|
1269
|
-
await this.moveToElement(source, {
|
|
1270
|
-
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1271
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1272
|
-
});
|
|
1273
|
-
await this.backend.mouseButton('left', 'down');
|
|
1274
|
-
await sleep(DRAG_HOLD_DURATION);
|
|
1275
|
-
await this.moveToElement(target, {
|
|
1276
|
-
steps: SMOOTH_MOVE_STEPS_DRAG,
|
|
1277
|
-
stepDelayMs: SMOOTH_MOVE_DELAY_DRAG
|
|
1278
|
-
});
|
|
1279
|
-
await sleep(DRAG_HOLD_DURATION);
|
|
1280
|
-
await this.backend.mouseButton('left', 'up');
|
|
1281
|
-
}),
|
|
1282
|
-
defineAction({
|
|
1283
|
-
name: 'MiddleClick',
|
|
1284
|
-
description: 'Middle click the element',
|
|
1285
|
-
sample: {
|
|
1286
|
-
locate: {
|
|
1287
|
-
prompt: 'the browser tab close target'
|
|
1288
|
-
}
|
|
1289
|
-
},
|
|
1290
|
-
paramSchema: actionTapParamSchema,
|
|
1291
|
-
call: async ({ locate })=>{
|
|
1292
|
-
const element = this.requireLocate(locate, 'middle click');
|
|
1293
|
-
await this.moveToElement(element, {
|
|
1294
|
-
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1295
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1296
|
-
});
|
|
1297
|
-
await this.backend.mouseButton('middle', 'click');
|
|
1298
|
-
}
|
|
1299
|
-
}),
|
|
1147
|
+
...defineActionsFromInputPrimitives(this.inputPrimitives),
|
|
1300
1148
|
defineAction({
|
|
1301
1149
|
name: 'ListDisplays',
|
|
1302
1150
|
description: 'List all available displays/monitors',
|
|
@@ -1325,10 +1173,6 @@ class RDPDevice {
|
|
|
1325
1173
|
throwIfDestroyed() {
|
|
1326
1174
|
if (this.destroyed) throw new Error('RDPDevice has been destroyed');
|
|
1327
1175
|
}
|
|
1328
|
-
requireLocate(locate, actionName) {
|
|
1329
|
-
if (!locate) throw new Error(`Missing target element for ${actionName}`);
|
|
1330
|
-
return locate;
|
|
1331
|
-
}
|
|
1332
1176
|
async moveToElement(element, options) {
|
|
1333
1177
|
this.assertConnected();
|
|
1334
1178
|
const targetX = Math.round(element.center[0]);
|
|
@@ -1394,6 +1238,113 @@ class RDPDevice {
|
|
|
1394
1238
|
device_define_property(this, "destroyed", false);
|
|
1395
1239
|
device_define_property(this, "cursorPosition", void 0);
|
|
1396
1240
|
device_define_property(this, "uri", void 0);
|
|
1241
|
+
device_define_property(this, "inputPrimitives", {
|
|
1242
|
+
pointer: {
|
|
1243
|
+
tap: async ({ x, y })=>{
|
|
1244
|
+
await this.movePointer(Math.round(x), Math.round(y), {
|
|
1245
|
+
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1246
|
+
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1247
|
+
});
|
|
1248
|
+
await this.backend.mouseButton('left', 'down');
|
|
1249
|
+
await sleep(device_CLICK_HOLD_DURATION);
|
|
1250
|
+
await this.backend.mouseButton('left', 'up');
|
|
1251
|
+
},
|
|
1252
|
+
doubleClick: async ({ x, y })=>{
|
|
1253
|
+
await this.movePointer(Math.round(x), Math.round(y), {
|
|
1254
|
+
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1255
|
+
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1256
|
+
});
|
|
1257
|
+
await this.backend.mouseButton('left', 'doubleClick');
|
|
1258
|
+
},
|
|
1259
|
+
rightClick: async ({ x, y })=>{
|
|
1260
|
+
await this.movePointer(Math.round(x), Math.round(y), {
|
|
1261
|
+
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1262
|
+
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1263
|
+
});
|
|
1264
|
+
await this.backend.mouseButton('right', 'click');
|
|
1265
|
+
},
|
|
1266
|
+
hover: async ({ x, y })=>{
|
|
1267
|
+
await this.movePointer(Math.round(x), Math.round(y), {
|
|
1268
|
+
steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
|
|
1269
|
+
stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE,
|
|
1270
|
+
settleDelayMs: device_MOUSE_MOVE_EFFECT_WAIT
|
|
1271
|
+
});
|
|
1272
|
+
},
|
|
1273
|
+
dragAndDrop: async (from, to)=>{
|
|
1274
|
+
await this.movePointer(Math.round(from.x), Math.round(from.y), {
|
|
1275
|
+
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1276
|
+
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1277
|
+
});
|
|
1278
|
+
await this.backend.mouseButton('left', 'down');
|
|
1279
|
+
await sleep(DRAG_HOLD_DURATION);
|
|
1280
|
+
await this.movePointer(Math.round(to.x), Math.round(to.y), {
|
|
1281
|
+
steps: SMOOTH_MOVE_STEPS_DRAG,
|
|
1282
|
+
stepDelayMs: SMOOTH_MOVE_DELAY_DRAG
|
|
1283
|
+
});
|
|
1284
|
+
await sleep(DRAG_HOLD_DURATION);
|
|
1285
|
+
await this.backend.mouseButton('left', 'up');
|
|
1286
|
+
}
|
|
1287
|
+
},
|
|
1288
|
+
keyboard: {
|
|
1289
|
+
typeText: async (value, opts)=>{
|
|
1290
|
+
this.assertConnected();
|
|
1291
|
+
const target = opts?.target;
|
|
1292
|
+
if (target) {
|
|
1293
|
+
await this.inputPrimitives.pointer.tap({
|
|
1294
|
+
x: target.center[0],
|
|
1295
|
+
y: target.center[1]
|
|
1296
|
+
});
|
|
1297
|
+
await sleep(device_INPUT_FOCUS_DELAY);
|
|
1298
|
+
}
|
|
1299
|
+
if (opts?.replace !== false) {
|
|
1300
|
+
await this.clearInput();
|
|
1301
|
+
await sleep(device_INPUT_CLEAR_DELAY);
|
|
1302
|
+
}
|
|
1303
|
+
if (opts?.focusOnly || !value) return;
|
|
1304
|
+
await this.backend.typeText(value);
|
|
1305
|
+
},
|
|
1306
|
+
clearInput: async (target)=>{
|
|
1307
|
+
this.assertConnected();
|
|
1308
|
+
const element = target;
|
|
1309
|
+
if (element) {
|
|
1310
|
+
await this.inputPrimitives.pointer.tap({
|
|
1311
|
+
x: element.center[0],
|
|
1312
|
+
y: element.center[1]
|
|
1313
|
+
});
|
|
1314
|
+
await sleep(device_INPUT_FOCUS_DELAY);
|
|
1315
|
+
}
|
|
1316
|
+
await this.clearInput();
|
|
1317
|
+
await sleep(device_INPUT_CLEAR_DELAY);
|
|
1318
|
+
},
|
|
1319
|
+
keyboardPress: async (keyName, opts)=>{
|
|
1320
|
+
this.assertConnected();
|
|
1321
|
+
const target = opts?.target;
|
|
1322
|
+
if (target) await this.inputPrimitives.pointer.tap({
|
|
1323
|
+
x: target.center[0],
|
|
1324
|
+
y: target.center[1]
|
|
1325
|
+
});
|
|
1326
|
+
await this.backend.keyPress(keyName);
|
|
1327
|
+
}
|
|
1328
|
+
},
|
|
1329
|
+
scroll: {
|
|
1330
|
+
scroll: async (param)=>{
|
|
1331
|
+
this.assertConnected();
|
|
1332
|
+
const target = param.locate;
|
|
1333
|
+
if (target) await this.moveToElement(target, {
|
|
1334
|
+
steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
|
|
1335
|
+
stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE
|
|
1336
|
+
});
|
|
1337
|
+
if (param.scrollType && 'singleAction' !== param.scrollType) {
|
|
1338
|
+
const direction = this.edgeScrollDirection(param.scrollType);
|
|
1339
|
+
for(let i = 0; i < device_EDGE_SCROLL_STEPS; i++)await this.performWheel(direction, DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
|
|
1340
|
+
await sleep(device_SCROLL_COMPLETE_DELAY);
|
|
1341
|
+
return;
|
|
1342
|
+
}
|
|
1343
|
+
await this.performWheel(param.direction || 'down', param.distance || DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
|
|
1344
|
+
await sleep(device_SCROLL_COMPLETE_DELAY);
|
|
1345
|
+
}
|
|
1346
|
+
}
|
|
1347
|
+
});
|
|
1397
1348
|
this.options = {
|
|
1398
1349
|
port: 3389,
|
|
1399
1350
|
securityProtocol: 'auto',
|
|
@@ -1625,7 +1576,7 @@ class ComputerMCPServer extends BaseMCPServer {
|
|
|
1625
1576
|
constructor(toolsManager){
|
|
1626
1577
|
super({
|
|
1627
1578
|
name: '@midscene/computer-mcp',
|
|
1628
|
-
version: "1.8.1
|
|
1579
|
+
version: "1.8.1",
|
|
1629
1580
|
description: 'Control the computer desktop using natural language commands'
|
|
1630
1581
|
}, toolsManager);
|
|
1631
1582
|
}
|