@midscene/computer 1.8.0 → 1.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/cli.mjs +898 -274
- package/dist/es/index.mjs +450 -395
- package/dist/es/mcp-server.mjs +898 -274
- package/dist/lib/cli.js +879 -256
- package/dist/lib/index.js +452 -394
- package/dist/lib/mcp-server.js +880 -257
- package/dist/types/index.d.ts +34 -5
- package/dist/types/mcp-server.d.ts +25 -5
- package/package.json +3 -3
package/dist/es/index.mjs
CHANGED
|
@@ -4,8 +4,7 @@ import { existsSync } from "node:fs";
|
|
|
4
4
|
import { createRequire } from "node:module";
|
|
5
5
|
import { dirname, resolve as external_node_path_resolve } from "node:path";
|
|
6
6
|
import { fileURLToPath } from "node:url";
|
|
7
|
-
import {
|
|
8
|
-
import { actionHoverParamSchema, actionTapParamSchema, defineAction, defineActionClearInput, defineActionDoubleClick, defineActionDragAndDrop, defineActionHover, defineActionInput, defineActionKeyboardPress, defineActionRightClick, defineActionScroll, defineActionTap } from "@midscene/core/device";
|
|
7
|
+
import { defineAction, defineActionsFromInputPrimitives } from "@midscene/core/device";
|
|
9
8
|
import { sleep } from "@midscene/core/utils";
|
|
10
9
|
import { createImgBase64ByFormat } from "@midscene/shared/img";
|
|
11
10
|
import { getDebug } from "@midscene/shared/logger";
|
|
@@ -13,6 +12,7 @@ import screenshot_desktop from "screenshot-desktop";
|
|
|
13
12
|
import { Agent } from "@midscene/core/agent";
|
|
14
13
|
import { once } from "node:events";
|
|
15
14
|
import { createInterface } from "node:readline";
|
|
15
|
+
import { z } from "@midscene/core";
|
|
16
16
|
import { BaseMidsceneTools } from "@midscene/shared/mcp/base-tools";
|
|
17
17
|
import { overrideAIConfig } from "@midscene/shared/env";
|
|
18
18
|
const debugXvfb = getDebug('computer:xvfb');
|
|
@@ -92,15 +92,6 @@ function _define_property(obj, key, value) {
|
|
|
92
92
|
else obj[key] = value;
|
|
93
93
|
return obj;
|
|
94
94
|
}
|
|
95
|
-
const computerInputParamSchema = z.object({
|
|
96
|
-
value: z.string().describe('The text to input'),
|
|
97
|
-
mode: z["enum"]([
|
|
98
|
-
'replace',
|
|
99
|
-
'clear',
|
|
100
|
-
'append'
|
|
101
|
-
]).default('replace').optional().describe('Input mode: replace, clear, or append'),
|
|
102
|
-
locate: getMidsceneLocationSchema().describe('The input field to be filled').optional()
|
|
103
|
-
});
|
|
104
95
|
const SMOOTH_MOVE_STEPS_TAP = 8;
|
|
105
96
|
const SMOOTH_MOVE_STEPS_MOUSE_MOVE = 10;
|
|
106
97
|
const SMOOTH_MOVE_DELAY_TAP = 8;
|
|
@@ -411,7 +402,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
|
|
|
411
402
|
}
|
|
412
403
|
async healthCheck() {
|
|
413
404
|
console.log('[HealthCheck] Starting health check...');
|
|
414
|
-
console.log("[HealthCheck] @midscene/computer v1.8.
|
|
405
|
+
console.log("[HealthCheck] @midscene/computer v1.8.1");
|
|
415
406
|
console.log('[HealthCheck] Taking screenshot...');
|
|
416
407
|
const screenshotTimeout = 15000;
|
|
417
408
|
let timeoutId;
|
|
@@ -477,21 +468,38 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
|
|
|
477
468
|
debugDevice('Taking screenshot', {
|
|
478
469
|
displayId: this.displayId
|
|
479
470
|
});
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
471
|
+
const options = {
|
|
472
|
+
format: 'png'
|
|
473
|
+
};
|
|
474
|
+
if (void 0 !== this.displayId) if ('darwin' === process.platform) {
|
|
475
|
+
const screenIndex = Number(this.displayId);
|
|
476
|
+
if (!Number.isNaN(screenIndex)) options.screen = screenIndex;
|
|
477
|
+
} else options.screen = this.displayId;
|
|
478
|
+
debugDevice('Screenshot options', options);
|
|
479
|
+
const MAX_ATTEMPTS = 3;
|
|
480
|
+
const RETRY_DELAY_MS = 300;
|
|
481
|
+
let lastRawMessage = '';
|
|
482
|
+
for(let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++)try {
|
|
489
483
|
const buffer = await screenshot_desktop(options);
|
|
484
|
+
if (attempt > 1) debugDevice(`Screenshot succeeded on attempt ${attempt}`);
|
|
490
485
|
return createImgBase64ByFormat('png', buffer.toString('base64'));
|
|
491
486
|
} catch (error) {
|
|
492
|
-
|
|
493
|
-
|
|
487
|
+
lastRawMessage = error instanceof Error ? error.message : String(error);
|
|
488
|
+
const isMacTransient = 'darwin' === process.platform && /could not create image from display/i.test(lastRawMessage);
|
|
489
|
+
const willRetry = isMacTransient && attempt < MAX_ATTEMPTS;
|
|
490
|
+
debugDevice(`Screenshot attempt ${attempt} failed: ${lastRawMessage}${willRetry ? ' — retrying' : ''}`);
|
|
491
|
+
if (!willRetry) break;
|
|
492
|
+
await sleep(RETRY_DELAY_MS);
|
|
494
493
|
}
|
|
494
|
+
if ('darwin' === process.platform && /could not create image from display/i.test(lastRawMessage)) throw new Error(`Failed to take screenshot on macOS: the host process is missing Screen Recording permission, or the target display is locked/sleeping.
|
|
495
|
+
|
|
496
|
+
Please follow these steps:
|
|
497
|
+
1. Open System Settings > Privacy & Security > Screen Recording
|
|
498
|
+
2. Enable the application running this script (e.g., Terminal, iTerm2, VS Code, WebStorm, or Midscene Studio)
|
|
499
|
+
3. Fully quit and relaunch that application after granting permission — macOS only re-reads this permission on process launch.
|
|
500
|
+
|
|
501
|
+
Original error: ${lastRawMessage}`);
|
|
502
|
+
throw new Error(`Failed to take screenshot: ${lastRawMessage}`);
|
|
495
503
|
}
|
|
496
504
|
async size() {
|
|
497
505
|
node_assert(device_libnut, 'libnut not initialized');
|
|
@@ -537,228 +545,111 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
|
|
|
537
545
|
node_assert(device_libnut, 'libnut not initialized');
|
|
538
546
|
await this.typeViaClipboard(text);
|
|
539
547
|
}
|
|
548
|
+
async selectAllAndDelete() {
|
|
549
|
+
node_assert(device_libnut, 'libnut not initialized');
|
|
550
|
+
if (this.useAppleScript) {
|
|
551
|
+
sendKeyViaAppleScript('a', [
|
|
552
|
+
'command'
|
|
553
|
+
]);
|
|
554
|
+
await sleep(50);
|
|
555
|
+
sendKeyViaAppleScript('backspace', []);
|
|
556
|
+
return;
|
|
557
|
+
}
|
|
558
|
+
const modifier = 'darwin' === process.platform ? 'command' : 'control';
|
|
559
|
+
device_libnut.keyTap('a', [
|
|
560
|
+
modifier
|
|
561
|
+
]);
|
|
562
|
+
await sleep(50);
|
|
563
|
+
device_libnut.keyTap('backspace');
|
|
564
|
+
}
|
|
565
|
+
async pressKeyboardShortcut(keyName) {
|
|
566
|
+
node_assert(device_libnut, 'libnut not initialized');
|
|
567
|
+
const keys = keyName.split('+');
|
|
568
|
+
const modifiers = keys.slice(0, -1).map(normalizeKeyName);
|
|
569
|
+
const key = normalizePrimaryKey(keys[keys.length - 1]);
|
|
570
|
+
debugDevice('KeyboardPress', {
|
|
571
|
+
original: keyName,
|
|
572
|
+
key,
|
|
573
|
+
modifiers,
|
|
574
|
+
driver: this.useAppleScript ? "applescript" : 'libnut'
|
|
575
|
+
});
|
|
576
|
+
if (this.useAppleScript) sendKeyViaAppleScript(key, modifiers);
|
|
577
|
+
else if (modifiers.length > 0) device_libnut.keyTap(key, modifiers);
|
|
578
|
+
else device_libnut.keyTap(key);
|
|
579
|
+
}
|
|
580
|
+
async performScroll(param) {
|
|
581
|
+
node_assert(device_libnut, 'libnut not initialized');
|
|
582
|
+
if (param.locate) {
|
|
583
|
+
const element = param.locate;
|
|
584
|
+
const [x, y] = element.center;
|
|
585
|
+
device_libnut.moveMouse(Math.round(x), Math.round(y));
|
|
586
|
+
}
|
|
587
|
+
const scrollType = param?.scrollType;
|
|
588
|
+
const edgeSpec = scrollType && scrollType in EDGE_SCROLL_SPEC ? EDGE_SCROLL_SPEC[scrollType] : null;
|
|
589
|
+
if (edgeSpec) {
|
|
590
|
+
if (runPhasedScroll(edgeSpec.direction, EDGE_SCROLL_TOTAL_PX, EDGE_SCROLL_STEPS)) return void await sleep(SCROLL_COMPLETE_DELAY);
|
|
591
|
+
if (this.useAppleScript) {
|
|
592
|
+
sendKeyViaAppleScript(edgeSpec.key);
|
|
593
|
+
await sleep(SCROLL_COMPLETE_DELAY);
|
|
594
|
+
return;
|
|
595
|
+
}
|
|
596
|
+
const [dx, dy] = edgeSpec.libnut;
|
|
597
|
+
for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
|
|
598
|
+
device_libnut.scrollMouse(dx, dy);
|
|
599
|
+
await sleep(SCROLL_STEP_DELAY);
|
|
600
|
+
}
|
|
601
|
+
return;
|
|
602
|
+
}
|
|
603
|
+
if ('singleAction' === scrollType || !scrollType) {
|
|
604
|
+
const distance = param?.distance || 500;
|
|
605
|
+
const direction = param?.direction || 'down';
|
|
606
|
+
const isKnownDirection = 'up' === direction || 'down' === direction || 'left' === direction || 'right' === direction;
|
|
607
|
+
if (isKnownDirection) {
|
|
608
|
+
const steps = Math.max(PHASED_MIN_STEPS, Math.round(distance / PHASED_PIXELS_PER_STEP));
|
|
609
|
+
if (runPhasedScroll(direction, distance, steps)) return void await sleep(SCROLL_COMPLETE_DELAY);
|
|
610
|
+
}
|
|
611
|
+
if (this.useAppleScript && ('up' === direction || 'down' === direction)) {
|
|
612
|
+
const pages = Math.max(1, Math.round(distance / APPROX_VIEWPORT_HEIGHT_PX));
|
|
613
|
+
const key = 'up' === direction ? 'pageup' : 'pagedown';
|
|
614
|
+
for(let i = 0; i < pages; i++){
|
|
615
|
+
sendKeyViaAppleScript(key);
|
|
616
|
+
await sleep(SCROLL_STEP_DELAY);
|
|
617
|
+
}
|
|
618
|
+
await sleep(SCROLL_COMPLETE_DELAY);
|
|
619
|
+
return;
|
|
620
|
+
}
|
|
621
|
+
const ticks = Math.ceil(distance / 100);
|
|
622
|
+
const directionMap = {
|
|
623
|
+
up: [
|
|
624
|
+
0,
|
|
625
|
+
ticks
|
|
626
|
+
],
|
|
627
|
+
down: [
|
|
628
|
+
0,
|
|
629
|
+
-ticks
|
|
630
|
+
],
|
|
631
|
+
left: [
|
|
632
|
+
-ticks,
|
|
633
|
+
0
|
|
634
|
+
],
|
|
635
|
+
right: [
|
|
636
|
+
ticks,
|
|
637
|
+
0
|
|
638
|
+
]
|
|
639
|
+
};
|
|
640
|
+
const [dx, dy] = directionMap[direction] || [
|
|
641
|
+
0,
|
|
642
|
+
-ticks
|
|
643
|
+
];
|
|
644
|
+
device_libnut.scrollMouse(dx, dy);
|
|
645
|
+
await sleep(SCROLL_COMPLETE_DELAY);
|
|
646
|
+
return;
|
|
647
|
+
}
|
|
648
|
+
throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
|
|
649
|
+
}
|
|
540
650
|
actionSpace() {
|
|
541
651
|
const defaultActions = [
|
|
542
|
-
|
|
543
|
-
node_assert(device_libnut, 'libnut not initialized');
|
|
544
|
-
const element = param.locate;
|
|
545
|
-
node_assert(element, 'Element not found, cannot tap');
|
|
546
|
-
const [x, y] = element.center;
|
|
547
|
-
const targetX = Math.round(x);
|
|
548
|
-
const targetY = Math.round(y);
|
|
549
|
-
await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
|
|
550
|
-
device_libnut.mouseToggle('down', 'left');
|
|
551
|
-
await sleep(CLICK_HOLD_DURATION);
|
|
552
|
-
device_libnut.mouseToggle('up', 'left');
|
|
553
|
-
}),
|
|
554
|
-
defineActionDoubleClick(async (param)=>{
|
|
555
|
-
node_assert(device_libnut, 'libnut not initialized');
|
|
556
|
-
const element = param.locate;
|
|
557
|
-
node_assert(element, 'Element not found, cannot double click');
|
|
558
|
-
const [x, y] = element.center;
|
|
559
|
-
device_libnut.moveMouse(Math.round(x), Math.round(y));
|
|
560
|
-
device_libnut.mouseClick('left', true);
|
|
561
|
-
}),
|
|
562
|
-
defineActionRightClick(async (param)=>{
|
|
563
|
-
node_assert(device_libnut, 'libnut not initialized');
|
|
564
|
-
const element = param.locate;
|
|
565
|
-
node_assert(element, 'Element not found, cannot right click');
|
|
566
|
-
const [x, y] = element.center;
|
|
567
|
-
device_libnut.moveMouse(Math.round(x), Math.round(y));
|
|
568
|
-
device_libnut.mouseClick('right');
|
|
569
|
-
}),
|
|
570
|
-
defineAction({
|
|
571
|
-
name: 'MouseMove',
|
|
572
|
-
description: 'Move the mouse to the element',
|
|
573
|
-
interfaceAlias: 'aiHover',
|
|
574
|
-
paramSchema: actionHoverParamSchema,
|
|
575
|
-
sample: {
|
|
576
|
-
locate: {
|
|
577
|
-
prompt: 'the navigation menu item "Products"'
|
|
578
|
-
}
|
|
579
|
-
},
|
|
580
|
-
call: async (param)=>{
|
|
581
|
-
node_assert(device_libnut, 'libnut not initialized');
|
|
582
|
-
const element = param.locate;
|
|
583
|
-
node_assert(element, 'Element not found, cannot move mouse');
|
|
584
|
-
const [x, y] = element.center;
|
|
585
|
-
const targetX = Math.round(x);
|
|
586
|
-
const targetY = Math.round(y);
|
|
587
|
-
await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_MOUSE_MOVE, SMOOTH_MOVE_DELAY_MOUSE_MOVE);
|
|
588
|
-
await sleep(MOUSE_MOVE_EFFECT_WAIT);
|
|
589
|
-
}
|
|
590
|
-
}),
|
|
591
|
-
defineAction({
|
|
592
|
-
name: 'Input',
|
|
593
|
-
description: 'Input text into the input field',
|
|
594
|
-
interfaceAlias: 'aiInput',
|
|
595
|
-
paramSchema: computerInputParamSchema,
|
|
596
|
-
sample: {
|
|
597
|
-
value: 'test@example.com',
|
|
598
|
-
locate: {
|
|
599
|
-
prompt: 'the email input field'
|
|
600
|
-
}
|
|
601
|
-
},
|
|
602
|
-
call: async (param)=>{
|
|
603
|
-
node_assert(device_libnut, 'libnut not initialized');
|
|
604
|
-
const element = param.locate;
|
|
605
|
-
if (element) {
|
|
606
|
-
const [x, y] = element.center;
|
|
607
|
-
device_libnut.moveMouse(Math.round(x), Math.round(y));
|
|
608
|
-
device_libnut.mouseClick('left');
|
|
609
|
-
await sleep(INPUT_FOCUS_DELAY);
|
|
610
|
-
if ('append' !== param.mode) {
|
|
611
|
-
if (this.useAppleScript) {
|
|
612
|
-
sendKeyViaAppleScript('a', [
|
|
613
|
-
'command'
|
|
614
|
-
]);
|
|
615
|
-
await sleep(50);
|
|
616
|
-
sendKeyViaAppleScript('backspace', []);
|
|
617
|
-
} else {
|
|
618
|
-
const modifier = 'darwin' === process.platform ? 'command' : 'control';
|
|
619
|
-
device_libnut.keyTap('a', [
|
|
620
|
-
modifier
|
|
621
|
-
]);
|
|
622
|
-
await sleep(50);
|
|
623
|
-
device_libnut.keyTap('backspace');
|
|
624
|
-
}
|
|
625
|
-
await sleep(INPUT_CLEAR_DELAY);
|
|
626
|
-
}
|
|
627
|
-
}
|
|
628
|
-
if ('clear' === param.mode) return;
|
|
629
|
-
if (!param.value) return;
|
|
630
|
-
await this.smartTypeString(param.value);
|
|
631
|
-
}
|
|
632
|
-
}),
|
|
633
|
-
defineActionScroll(async (param)=>{
|
|
634
|
-
node_assert(device_libnut, 'libnut not initialized');
|
|
635
|
-
if (param.locate) {
|
|
636
|
-
const element = param.locate;
|
|
637
|
-
const [x, y] = element.center;
|
|
638
|
-
device_libnut.moveMouse(Math.round(x), Math.round(y));
|
|
639
|
-
}
|
|
640
|
-
const scrollType = param?.scrollType;
|
|
641
|
-
const edgeSpec = scrollType && scrollType in EDGE_SCROLL_SPEC ? EDGE_SCROLL_SPEC[scrollType] : null;
|
|
642
|
-
if (edgeSpec) {
|
|
643
|
-
if (runPhasedScroll(edgeSpec.direction, EDGE_SCROLL_TOTAL_PX, EDGE_SCROLL_STEPS)) return void await sleep(SCROLL_COMPLETE_DELAY);
|
|
644
|
-
if (this.useAppleScript) {
|
|
645
|
-
sendKeyViaAppleScript(edgeSpec.key);
|
|
646
|
-
await sleep(SCROLL_COMPLETE_DELAY);
|
|
647
|
-
return;
|
|
648
|
-
}
|
|
649
|
-
const [dx, dy] = edgeSpec.libnut;
|
|
650
|
-
for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
|
|
651
|
-
device_libnut.scrollMouse(dx, dy);
|
|
652
|
-
await sleep(SCROLL_STEP_DELAY);
|
|
653
|
-
}
|
|
654
|
-
return;
|
|
655
|
-
}
|
|
656
|
-
if ('singleAction' === scrollType || !scrollType) {
|
|
657
|
-
const distance = param?.distance || 500;
|
|
658
|
-
const direction = param?.direction || 'down';
|
|
659
|
-
const isKnownDirection = 'up' === direction || 'down' === direction || 'left' === direction || 'right' === direction;
|
|
660
|
-
if (isKnownDirection) {
|
|
661
|
-
const steps = Math.max(PHASED_MIN_STEPS, Math.round(distance / PHASED_PIXELS_PER_STEP));
|
|
662
|
-
if (runPhasedScroll(direction, distance, steps)) return void await sleep(SCROLL_COMPLETE_DELAY);
|
|
663
|
-
}
|
|
664
|
-
if (this.useAppleScript && ('up' === direction || 'down' === direction)) {
|
|
665
|
-
const pages = Math.max(1, Math.round(distance / APPROX_VIEWPORT_HEIGHT_PX));
|
|
666
|
-
const key = 'up' === direction ? 'pageup' : 'pagedown';
|
|
667
|
-
for(let i = 0; i < pages; i++){
|
|
668
|
-
sendKeyViaAppleScript(key);
|
|
669
|
-
await sleep(SCROLL_STEP_DELAY);
|
|
670
|
-
}
|
|
671
|
-
await sleep(SCROLL_COMPLETE_DELAY);
|
|
672
|
-
return;
|
|
673
|
-
}
|
|
674
|
-
const ticks = Math.ceil(distance / 100);
|
|
675
|
-
const directionMap = {
|
|
676
|
-
up: [
|
|
677
|
-
0,
|
|
678
|
-
ticks
|
|
679
|
-
],
|
|
680
|
-
down: [
|
|
681
|
-
0,
|
|
682
|
-
-ticks
|
|
683
|
-
],
|
|
684
|
-
left: [
|
|
685
|
-
-ticks,
|
|
686
|
-
0
|
|
687
|
-
],
|
|
688
|
-
right: [
|
|
689
|
-
ticks,
|
|
690
|
-
0
|
|
691
|
-
]
|
|
692
|
-
};
|
|
693
|
-
const [dx, dy] = directionMap[direction] || [
|
|
694
|
-
0,
|
|
695
|
-
-ticks
|
|
696
|
-
];
|
|
697
|
-
device_libnut.scrollMouse(dx, dy);
|
|
698
|
-
await sleep(SCROLL_COMPLETE_DELAY);
|
|
699
|
-
return;
|
|
700
|
-
}
|
|
701
|
-
throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
|
|
702
|
-
}),
|
|
703
|
-
defineActionKeyboardPress(async (param)=>{
|
|
704
|
-
node_assert(device_libnut, 'libnut not initialized');
|
|
705
|
-
if (param.locate) {
|
|
706
|
-
const [x, y] = param.locate.center;
|
|
707
|
-
device_libnut.moveMouse(Math.round(x), Math.round(y));
|
|
708
|
-
device_libnut.mouseClick('left');
|
|
709
|
-
await sleep(50);
|
|
710
|
-
}
|
|
711
|
-
const keys = param.keyName.split('+');
|
|
712
|
-
const modifiers = keys.slice(0, -1).map(normalizeKeyName);
|
|
713
|
-
const key = normalizePrimaryKey(keys[keys.length - 1]);
|
|
714
|
-
debugDevice('KeyboardPress', {
|
|
715
|
-
original: param.keyName,
|
|
716
|
-
key,
|
|
717
|
-
modifiers,
|
|
718
|
-
driver: this.useAppleScript ? "applescript" : 'libnut'
|
|
719
|
-
});
|
|
720
|
-
if (this.useAppleScript) sendKeyViaAppleScript(key, modifiers);
|
|
721
|
-
else if (modifiers.length > 0) device_libnut.keyTap(key, modifiers);
|
|
722
|
-
else device_libnut.keyTap(key);
|
|
723
|
-
}),
|
|
724
|
-
defineActionDragAndDrop(async (param)=>{
|
|
725
|
-
node_assert(device_libnut, 'libnut not initialized');
|
|
726
|
-
const from = param.from;
|
|
727
|
-
const to = param.to;
|
|
728
|
-
node_assert(from, 'missing "from" param for drag and drop');
|
|
729
|
-
node_assert(to, 'missing "to" param for drag and drop');
|
|
730
|
-
const [fromX, fromY] = from.center;
|
|
731
|
-
const [toX, toY] = to.center;
|
|
732
|
-
device_libnut.moveMouse(Math.round(fromX), Math.round(fromY));
|
|
733
|
-
device_libnut.mouseToggle('down', 'left');
|
|
734
|
-
await sleep(100);
|
|
735
|
-
device_libnut.moveMouse(Math.round(toX), Math.round(toY));
|
|
736
|
-
await sleep(100);
|
|
737
|
-
device_libnut.mouseToggle('up', 'left');
|
|
738
|
-
}),
|
|
739
|
-
defineActionClearInput(async (param)=>{
|
|
740
|
-
node_assert(device_libnut, 'libnut not initialized');
|
|
741
|
-
const element = param.locate;
|
|
742
|
-
node_assert(element, 'Element not found, cannot clear input');
|
|
743
|
-
const [x, y] = element.center;
|
|
744
|
-
device_libnut.moveMouse(Math.round(x), Math.round(y));
|
|
745
|
-
device_libnut.mouseClick('left');
|
|
746
|
-
await sleep(100);
|
|
747
|
-
if (this.useAppleScript) {
|
|
748
|
-
sendKeyViaAppleScript('a', [
|
|
749
|
-
'command'
|
|
750
|
-
]);
|
|
751
|
-
await sleep(50);
|
|
752
|
-
sendKeyViaAppleScript('backspace', []);
|
|
753
|
-
} else {
|
|
754
|
-
const modifier = 'darwin' === process.platform ? 'command' : 'control';
|
|
755
|
-
device_libnut.keyTap('a', [
|
|
756
|
-
modifier
|
|
757
|
-
]);
|
|
758
|
-
device_libnut.keyTap('backspace');
|
|
759
|
-
}
|
|
760
|
-
await sleep(50);
|
|
761
|
-
})
|
|
652
|
+
...defineActionsFromInputPrimitives(this.inputPrimitives)
|
|
762
653
|
];
|
|
763
654
|
const platformActions = Object.values(createPlatformActions());
|
|
764
655
|
const customActions = this.options?.customActions || [];
|
|
@@ -796,6 +687,88 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
|
|
|
796
687
|
_define_property(this, "xvfbCleanup", void 0);
|
|
797
688
|
_define_property(this, "useAppleScript", void 0);
|
|
798
689
|
_define_property(this, "uri", void 0);
|
|
690
|
+
_define_property(this, "inputPrimitives", {
|
|
691
|
+
pointer: {
|
|
692
|
+
tap: async ({ x, y })=>{
|
|
693
|
+
node_assert(device_libnut, 'libnut not initialized');
|
|
694
|
+
const targetX = Math.round(x);
|
|
695
|
+
const targetY = Math.round(y);
|
|
696
|
+
await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
|
|
697
|
+
device_libnut.mouseToggle('down', 'left');
|
|
698
|
+
await sleep(CLICK_HOLD_DURATION);
|
|
699
|
+
device_libnut.mouseToggle('up', 'left');
|
|
700
|
+
},
|
|
701
|
+
doubleClick: async ({ x, y })=>{
|
|
702
|
+
node_assert(device_libnut, 'libnut not initialized');
|
|
703
|
+
device_libnut.moveMouse(Math.round(x), Math.round(y));
|
|
704
|
+
device_libnut.mouseClick('left', true);
|
|
705
|
+
},
|
|
706
|
+
rightClick: async ({ x, y })=>{
|
|
707
|
+
node_assert(device_libnut, 'libnut not initialized');
|
|
708
|
+
device_libnut.moveMouse(Math.round(x), Math.round(y));
|
|
709
|
+
device_libnut.mouseClick('right');
|
|
710
|
+
},
|
|
711
|
+
hover: async ({ x, y })=>{
|
|
712
|
+
node_assert(device_libnut, 'libnut not initialized');
|
|
713
|
+
await smoothMoveMouse(Math.round(x), Math.round(y), SMOOTH_MOVE_STEPS_MOUSE_MOVE, SMOOTH_MOVE_DELAY_MOUSE_MOVE);
|
|
714
|
+
await sleep(MOUSE_MOVE_EFFECT_WAIT);
|
|
715
|
+
},
|
|
716
|
+
dragAndDrop: async (from, to)=>{
|
|
717
|
+
node_assert(device_libnut, 'libnut not initialized');
|
|
718
|
+
device_libnut.moveMouse(Math.round(from.x), Math.round(from.y));
|
|
719
|
+
device_libnut.mouseToggle('down', 'left');
|
|
720
|
+
await sleep(100);
|
|
721
|
+
device_libnut.moveMouse(Math.round(to.x), Math.round(to.y));
|
|
722
|
+
await sleep(100);
|
|
723
|
+
device_libnut.mouseToggle('up', 'left');
|
|
724
|
+
}
|
|
725
|
+
},
|
|
726
|
+
keyboard: {
|
|
727
|
+
typeText: async (value, opts)=>{
|
|
728
|
+
node_assert(device_libnut, 'libnut not initialized');
|
|
729
|
+
const element = opts?.target;
|
|
730
|
+
if (element) {
|
|
731
|
+
const [x, y] = element.center;
|
|
732
|
+
device_libnut.moveMouse(Math.round(x), Math.round(y));
|
|
733
|
+
device_libnut.mouseClick('left');
|
|
734
|
+
await sleep(INPUT_FOCUS_DELAY);
|
|
735
|
+
if (opts?.replace !== false) {
|
|
736
|
+
await this.selectAllAndDelete();
|
|
737
|
+
await sleep(INPUT_CLEAR_DELAY);
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
await this.smartTypeString(value);
|
|
741
|
+
},
|
|
742
|
+
keyboardPress: async (keyName, opts)=>{
|
|
743
|
+
node_assert(device_libnut, 'libnut not initialized');
|
|
744
|
+
const target = opts?.target;
|
|
745
|
+
if (target) {
|
|
746
|
+
const [x, y] = target.center;
|
|
747
|
+
device_libnut.moveMouse(Math.round(x), Math.round(y));
|
|
748
|
+
device_libnut.mouseClick('left');
|
|
749
|
+
await sleep(50);
|
|
750
|
+
}
|
|
751
|
+
await this.pressKeyboardShortcut(keyName);
|
|
752
|
+
},
|
|
753
|
+
clearInput: async (target)=>{
|
|
754
|
+
node_assert(device_libnut, 'libnut not initialized');
|
|
755
|
+
if (target) {
|
|
756
|
+
const element = target;
|
|
757
|
+
const [x, y] = element.center;
|
|
758
|
+
device_libnut.moveMouse(Math.round(x), Math.round(y));
|
|
759
|
+
device_libnut.mouseClick('left');
|
|
760
|
+
await sleep(100);
|
|
761
|
+
}
|
|
762
|
+
await this.selectAllAndDelete();
|
|
763
|
+
await sleep(50);
|
|
764
|
+
}
|
|
765
|
+
},
|
|
766
|
+
scroll: {
|
|
767
|
+
scroll: async (param)=>{
|
|
768
|
+
await this.performScroll(param);
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
});
|
|
799
772
|
this.options = options;
|
|
800
773
|
this.displayId = options?.displayId;
|
|
801
774
|
this.useAppleScript = 'darwin' === process.platform && options?.keyboardDriver !== 'libnut';
|
|
@@ -1214,132 +1187,7 @@ class RDPDevice {
|
|
|
1214
1187
|
}
|
|
1215
1188
|
actionSpace() {
|
|
1216
1189
|
const defaultActions = [
|
|
1217
|
-
|
|
1218
|
-
const element = this.requireLocate(locate, 'tap');
|
|
1219
|
-
await this.moveToElement(element, {
|
|
1220
|
-
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1221
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1222
|
-
});
|
|
1223
|
-
await this.backend.mouseButton('left', 'down');
|
|
1224
|
-
await sleep(device_CLICK_HOLD_DURATION);
|
|
1225
|
-
await this.backend.mouseButton('left', 'up');
|
|
1226
|
-
}),
|
|
1227
|
-
defineActionDoubleClick(async ({ locate })=>{
|
|
1228
|
-
const element = this.requireLocate(locate, 'double click');
|
|
1229
|
-
await this.moveToElement(element, {
|
|
1230
|
-
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1231
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1232
|
-
});
|
|
1233
|
-
await this.backend.mouseButton('left', 'doubleClick');
|
|
1234
|
-
}),
|
|
1235
|
-
defineActionRightClick(async ({ locate })=>{
|
|
1236
|
-
const element = this.requireLocate(locate, 'right click');
|
|
1237
|
-
await this.moveToElement(element, {
|
|
1238
|
-
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1239
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1240
|
-
});
|
|
1241
|
-
await this.backend.mouseButton('right', 'click');
|
|
1242
|
-
}),
|
|
1243
|
-
defineActionHover(async ({ locate })=>{
|
|
1244
|
-
const element = this.requireLocate(locate, 'hover');
|
|
1245
|
-
await this.moveToElement(element, {
|
|
1246
|
-
steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
|
|
1247
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE,
|
|
1248
|
-
settleDelayMs: device_MOUSE_MOVE_EFFECT_WAIT
|
|
1249
|
-
});
|
|
1250
|
-
}),
|
|
1251
|
-
defineActionInput(async (param)=>{
|
|
1252
|
-
this.assertConnected();
|
|
1253
|
-
if (param.locate) {
|
|
1254
|
-
await this.moveToElement(param.locate, {
|
|
1255
|
-
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1256
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1257
|
-
});
|
|
1258
|
-
await this.backend.mouseButton('left', 'click');
|
|
1259
|
-
await sleep(device_INPUT_FOCUS_DELAY);
|
|
1260
|
-
}
|
|
1261
|
-
if ('typeOnly' !== param.mode) {
|
|
1262
|
-
await this.clearInput();
|
|
1263
|
-
await sleep(device_INPUT_CLEAR_DELAY);
|
|
1264
|
-
}
|
|
1265
|
-
if ('clear' === param.mode) return;
|
|
1266
|
-
if (param.value) await this.backend.typeText(param.value);
|
|
1267
|
-
}),
|
|
1268
|
-
defineActionClearInput(async ({ locate })=>{
|
|
1269
|
-
this.assertConnected();
|
|
1270
|
-
if (locate) {
|
|
1271
|
-
await this.moveToElement(locate, {
|
|
1272
|
-
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1273
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1274
|
-
});
|
|
1275
|
-
await this.backend.mouseButton('left', 'click');
|
|
1276
|
-
await sleep(device_INPUT_FOCUS_DELAY);
|
|
1277
|
-
}
|
|
1278
|
-
await this.clearInput();
|
|
1279
|
-
await sleep(device_INPUT_CLEAR_DELAY);
|
|
1280
|
-
}),
|
|
1281
|
-
defineActionKeyboardPress(async ({ locate, keyName })=>{
|
|
1282
|
-
this.assertConnected();
|
|
1283
|
-
if (locate) {
|
|
1284
|
-
await this.moveToElement(locate, {
|
|
1285
|
-
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1286
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1287
|
-
});
|
|
1288
|
-
await this.backend.mouseButton('left', 'click');
|
|
1289
|
-
}
|
|
1290
|
-
await this.backend.keyPress(keyName);
|
|
1291
|
-
}),
|
|
1292
|
-
defineActionScroll(async (param)=>{
|
|
1293
|
-
this.assertConnected();
|
|
1294
|
-
const target = param.locate;
|
|
1295
|
-
if (target) await this.moveToElement(target, {
|
|
1296
|
-
steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
|
|
1297
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE
|
|
1298
|
-
});
|
|
1299
|
-
if (param.scrollType && 'singleAction' !== param.scrollType) {
|
|
1300
|
-
const direction = this.edgeScrollDirection(param.scrollType);
|
|
1301
|
-
for(let i = 0; i < device_EDGE_SCROLL_STEPS; i++)await this.performWheel(direction, DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
|
|
1302
|
-
await sleep(device_SCROLL_COMPLETE_DELAY);
|
|
1303
|
-
return;
|
|
1304
|
-
}
|
|
1305
|
-
await this.performWheel(param.direction || 'down', param.distance || DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
|
|
1306
|
-
await sleep(device_SCROLL_COMPLETE_DELAY);
|
|
1307
|
-
}),
|
|
1308
|
-
defineActionDragAndDrop(async ({ from, to })=>{
|
|
1309
|
-
this.assertConnected();
|
|
1310
|
-
const source = this.requireLocate(from, 'drag source');
|
|
1311
|
-
const target = this.requireLocate(to, 'drag target');
|
|
1312
|
-
await this.moveToElement(source, {
|
|
1313
|
-
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1314
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1315
|
-
});
|
|
1316
|
-
await this.backend.mouseButton('left', 'down');
|
|
1317
|
-
await sleep(DRAG_HOLD_DURATION);
|
|
1318
|
-
await this.moveToElement(target, {
|
|
1319
|
-
steps: SMOOTH_MOVE_STEPS_DRAG,
|
|
1320
|
-
stepDelayMs: SMOOTH_MOVE_DELAY_DRAG
|
|
1321
|
-
});
|
|
1322
|
-
await sleep(DRAG_HOLD_DURATION);
|
|
1323
|
-
await this.backend.mouseButton('left', 'up');
|
|
1324
|
-
}),
|
|
1325
|
-
defineAction({
|
|
1326
|
-
name: 'MiddleClick',
|
|
1327
|
-
description: 'Middle click the element',
|
|
1328
|
-
sample: {
|
|
1329
|
-
locate: {
|
|
1330
|
-
prompt: 'the browser tab close target'
|
|
1331
|
-
}
|
|
1332
|
-
},
|
|
1333
|
-
paramSchema: actionTapParamSchema,
|
|
1334
|
-
call: async ({ locate })=>{
|
|
1335
|
-
const element = this.requireLocate(locate, 'middle click');
|
|
1336
|
-
await this.moveToElement(element, {
|
|
1337
|
-
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1338
|
-
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1339
|
-
});
|
|
1340
|
-
await this.backend.mouseButton('middle', 'click');
|
|
1341
|
-
}
|
|
1342
|
-
}),
|
|
1190
|
+
...defineActionsFromInputPrimitives(this.inputPrimitives),
|
|
1343
1191
|
defineAction({
|
|
1344
1192
|
name: 'ListDisplays',
|
|
1345
1193
|
description: 'List all available displays/monitors',
|
|
@@ -1368,10 +1216,6 @@ class RDPDevice {
|
|
|
1368
1216
|
throwIfDestroyed() {
|
|
1369
1217
|
if (this.destroyed) throw new Error('RDPDevice has been destroyed');
|
|
1370
1218
|
}
|
|
1371
|
-
requireLocate(locate, actionName) {
|
|
1372
|
-
if (!locate) throw new Error(`Missing target element for ${actionName}`);
|
|
1373
|
-
return locate;
|
|
1374
|
-
}
|
|
1375
1219
|
async moveToElement(element, options) {
|
|
1376
1220
|
this.assertConnected();
|
|
1377
1221
|
const targetX = Math.round(element.center[0]);
|
|
@@ -1437,6 +1281,113 @@ class RDPDevice {
|
|
|
1437
1281
|
device_define_property(this, "destroyed", false);
|
|
1438
1282
|
device_define_property(this, "cursorPosition", void 0);
|
|
1439
1283
|
device_define_property(this, "uri", void 0);
|
|
1284
|
+
device_define_property(this, "inputPrimitives", {
|
|
1285
|
+
pointer: {
|
|
1286
|
+
tap: async ({ x, y })=>{
|
|
1287
|
+
await this.movePointer(Math.round(x), Math.round(y), {
|
|
1288
|
+
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1289
|
+
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1290
|
+
});
|
|
1291
|
+
await this.backend.mouseButton('left', 'down');
|
|
1292
|
+
await sleep(device_CLICK_HOLD_DURATION);
|
|
1293
|
+
await this.backend.mouseButton('left', 'up');
|
|
1294
|
+
},
|
|
1295
|
+
doubleClick: async ({ x, y })=>{
|
|
1296
|
+
await this.movePointer(Math.round(x), Math.round(y), {
|
|
1297
|
+
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1298
|
+
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1299
|
+
});
|
|
1300
|
+
await this.backend.mouseButton('left', 'doubleClick');
|
|
1301
|
+
},
|
|
1302
|
+
rightClick: async ({ x, y })=>{
|
|
1303
|
+
await this.movePointer(Math.round(x), Math.round(y), {
|
|
1304
|
+
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1305
|
+
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1306
|
+
});
|
|
1307
|
+
await this.backend.mouseButton('right', 'click');
|
|
1308
|
+
},
|
|
1309
|
+
hover: async ({ x, y })=>{
|
|
1310
|
+
await this.movePointer(Math.round(x), Math.round(y), {
|
|
1311
|
+
steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
|
|
1312
|
+
stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE,
|
|
1313
|
+
settleDelayMs: device_MOUSE_MOVE_EFFECT_WAIT
|
|
1314
|
+
});
|
|
1315
|
+
},
|
|
1316
|
+
dragAndDrop: async (from, to)=>{
|
|
1317
|
+
await this.movePointer(Math.round(from.x), Math.round(from.y), {
|
|
1318
|
+
steps: device_SMOOTH_MOVE_STEPS_TAP,
|
|
1319
|
+
stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
|
|
1320
|
+
});
|
|
1321
|
+
await this.backend.mouseButton('left', 'down');
|
|
1322
|
+
await sleep(DRAG_HOLD_DURATION);
|
|
1323
|
+
await this.movePointer(Math.round(to.x), Math.round(to.y), {
|
|
1324
|
+
steps: SMOOTH_MOVE_STEPS_DRAG,
|
|
1325
|
+
stepDelayMs: SMOOTH_MOVE_DELAY_DRAG
|
|
1326
|
+
});
|
|
1327
|
+
await sleep(DRAG_HOLD_DURATION);
|
|
1328
|
+
await this.backend.mouseButton('left', 'up');
|
|
1329
|
+
}
|
|
1330
|
+
},
|
|
1331
|
+
keyboard: {
|
|
1332
|
+
typeText: async (value, opts)=>{
|
|
1333
|
+
this.assertConnected();
|
|
1334
|
+
const target = opts?.target;
|
|
1335
|
+
if (target) {
|
|
1336
|
+
await this.inputPrimitives.pointer.tap({
|
|
1337
|
+
x: target.center[0],
|
|
1338
|
+
y: target.center[1]
|
|
1339
|
+
});
|
|
1340
|
+
await sleep(device_INPUT_FOCUS_DELAY);
|
|
1341
|
+
}
|
|
1342
|
+
if (opts?.replace !== false) {
|
|
1343
|
+
await this.clearInput();
|
|
1344
|
+
await sleep(device_INPUT_CLEAR_DELAY);
|
|
1345
|
+
}
|
|
1346
|
+
if (opts?.focusOnly || !value) return;
|
|
1347
|
+
await this.backend.typeText(value);
|
|
1348
|
+
},
|
|
1349
|
+
clearInput: async (target)=>{
|
|
1350
|
+
this.assertConnected();
|
|
1351
|
+
const element = target;
|
|
1352
|
+
if (element) {
|
|
1353
|
+
await this.inputPrimitives.pointer.tap({
|
|
1354
|
+
x: element.center[0],
|
|
1355
|
+
y: element.center[1]
|
|
1356
|
+
});
|
|
1357
|
+
await sleep(device_INPUT_FOCUS_DELAY);
|
|
1358
|
+
}
|
|
1359
|
+
await this.clearInput();
|
|
1360
|
+
await sleep(device_INPUT_CLEAR_DELAY);
|
|
1361
|
+
},
|
|
1362
|
+
keyboardPress: async (keyName, opts)=>{
|
|
1363
|
+
this.assertConnected();
|
|
1364
|
+
const target = opts?.target;
|
|
1365
|
+
if (target) await this.inputPrimitives.pointer.tap({
|
|
1366
|
+
x: target.center[0],
|
|
1367
|
+
y: target.center[1]
|
|
1368
|
+
});
|
|
1369
|
+
await this.backend.keyPress(keyName);
|
|
1370
|
+
}
|
|
1371
|
+
},
|
|
1372
|
+
scroll: {
|
|
1373
|
+
scroll: async (param)=>{
|
|
1374
|
+
this.assertConnected();
|
|
1375
|
+
const target = param.locate;
|
|
1376
|
+
if (target) await this.moveToElement(target, {
|
|
1377
|
+
steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
|
|
1378
|
+
stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE
|
|
1379
|
+
});
|
|
1380
|
+
if (param.scrollType && 'singleAction' !== param.scrollType) {
|
|
1381
|
+
const direction = this.edgeScrollDirection(param.scrollType);
|
|
1382
|
+
for(let i = 0; i < device_EDGE_SCROLL_STEPS; i++)await this.performWheel(direction, DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
|
|
1383
|
+
await sleep(device_SCROLL_COMPLETE_DELAY);
|
|
1384
|
+
return;
|
|
1385
|
+
}
|
|
1386
|
+
await this.performWheel(param.direction || 'down', param.distance || DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
|
|
1387
|
+
await sleep(device_SCROLL_COMPLETE_DELAY);
|
|
1388
|
+
}
|
|
1389
|
+
}
|
|
1390
|
+
});
|
|
1440
1391
|
this.options = {
|
|
1441
1392
|
port: 3389,
|
|
1442
1393
|
securityProtocol: 'auto',
|
|
@@ -1495,10 +1446,61 @@ function mcp_tools_define_property(obj, key, value) {
|
|
|
1495
1446
|
return obj;
|
|
1496
1447
|
}
|
|
1497
1448
|
const mcp_tools_debug = getDebug('mcp:computer-tools');
|
|
1449
|
+
const RDP_SECURITY_PROTOCOLS = [
|
|
1450
|
+
'auto',
|
|
1451
|
+
'tls',
|
|
1452
|
+
'nla',
|
|
1453
|
+
'rdp'
|
|
1454
|
+
];
|
|
1498
1455
|
const computerInitArgShape = {
|
|
1499
|
-
displayId: z.string().optional().describe('Display ID (from computer_list_displays)'),
|
|
1500
|
-
headless: z.boolean().optional().describe('Start virtual display via Xvfb (Linux only)')
|
|
1456
|
+
displayId: z.string().optional().describe('Display ID for local mode (from computer_list_displays). Ignored when host is set.'),
|
|
1457
|
+
headless: z.boolean().optional().describe('Start virtual display via Xvfb (Linux local mode only). Ignored when host is set.'),
|
|
1458
|
+
host: z.string().optional().describe('RDP host (FQDN or IP). Set this to switch into RDP mode.'),
|
|
1459
|
+
port: z.number().optional().describe('RDP port (default 3389). Requires host.'),
|
|
1460
|
+
username: z.string().optional().describe('RDP username. Requires host.'),
|
|
1461
|
+
password: z.string().optional().describe('RDP password. Requires host. Prefer setting via environment or a secrets manager.'),
|
|
1462
|
+
domain: z.string().optional().describe('RDP domain. Requires host.'),
|
|
1463
|
+
adminSession: z.boolean().optional().describe('Attach to the RDP admin/console session. Requires host.'),
|
|
1464
|
+
ignoreCertificate: z.boolean().optional().describe('Skip TLS certificate validation. Requires host.'),
|
|
1465
|
+
securityProtocol: z["enum"](RDP_SECURITY_PROTOCOLS).optional().describe('RDP security protocol negotiation (default auto). Requires host.'),
|
|
1466
|
+
desktopWidth: z.number().optional().describe('Remote desktop width in pixels. Requires host.'),
|
|
1467
|
+
desktopHeight: z.number().optional().describe('Remote desktop height in pixels. Requires host.')
|
|
1501
1468
|
};
|
|
1469
|
+
function adaptComputerInitArgs(extracted) {
|
|
1470
|
+
if (!extracted || 0 === Object.keys(extracted).length) return;
|
|
1471
|
+
if (extracted.host) {
|
|
1472
|
+
const { displayId: _d, headless: _h, ...rdpFields } = extracted;
|
|
1473
|
+
return {
|
|
1474
|
+
mode: 'rdp',
|
|
1475
|
+
...rdpFields,
|
|
1476
|
+
host: extracted.host
|
|
1477
|
+
};
|
|
1478
|
+
}
|
|
1479
|
+
return {
|
|
1480
|
+
mode: 'local',
|
|
1481
|
+
displayId: extracted.displayId,
|
|
1482
|
+
headless: extracted.headless
|
|
1483
|
+
};
|
|
1484
|
+
}
|
|
1485
|
+
function shouldRetargetAgent(opts) {
|
|
1486
|
+
if (!opts) return false;
|
|
1487
|
+
if ('rdp' === opts.mode) return true;
|
|
1488
|
+
return void 0 !== opts.displayId || void 0 !== opts.headless;
|
|
1489
|
+
}
|
|
1490
|
+
function describeConnectTarget(opts) {
|
|
1491
|
+
if (opts?.mode === 'rdp') {
|
|
1492
|
+
const portSuffix = opts.port ? `:${opts.port}` : '';
|
|
1493
|
+
const userSuffix = opts.username ? ` as ${opts.username}` : '';
|
|
1494
|
+
return ` via RDP (${opts.host}${portSuffix}${userSuffix})`;
|
|
1495
|
+
}
|
|
1496
|
+
if (opts?.mode === 'local' && opts.displayId) return ` (Display: ${opts.displayId})`;
|
|
1497
|
+
return ' (Primary display)';
|
|
1498
|
+
}
|
|
1499
|
+
function getCliReportSessionTarget(opts) {
|
|
1500
|
+
if (opts?.mode === 'rdp') return `rdp:${opts.host}`;
|
|
1501
|
+
if (opts?.mode === 'local' && opts.displayId) return opts.displayId;
|
|
1502
|
+
return 'primary';
|
|
1503
|
+
}
|
|
1502
1504
|
class ComputerMidsceneTools extends BaseMidsceneTools {
|
|
1503
1505
|
getCliReportSessionName() {
|
|
1504
1506
|
return 'midscene-computer';
|
|
@@ -1507,9 +1509,7 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
|
|
|
1507
1509
|
return new ComputerDevice({});
|
|
1508
1510
|
}
|
|
1509
1511
|
async ensureAgent(opts) {
|
|
1510
|
-
|
|
1511
|
-
const headless = opts?.headless;
|
|
1512
|
-
if (this.agent && (void 0 !== displayId || void 0 !== headless)) {
|
|
1512
|
+
if (this.agent && shouldRetargetAgent(opts)) {
|
|
1513
1513
|
try {
|
|
1514
1514
|
await this.agent.destroy?.();
|
|
1515
1515
|
} catch (error) {
|
|
@@ -1518,8 +1518,20 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
|
|
|
1518
1518
|
this.agent = void 0;
|
|
1519
1519
|
}
|
|
1520
1520
|
if (this.agent) return this.agent;
|
|
1521
|
-
mcp_tools_debug('Creating Computer agent with displayId:', displayId || 'primary');
|
|
1522
1521
|
const reportOptions = this.readCliReportAgentOptions();
|
|
1522
|
+
if (opts?.mode === 'rdp') {
|
|
1523
|
+
mcp_tools_debug('Creating RDP Computer agent for host:', opts.host);
|
|
1524
|
+
const { mode: _mode, ...rdpFields } = opts;
|
|
1525
|
+
const agent = await agentForRDPComputer({
|
|
1526
|
+
...rdpFields,
|
|
1527
|
+
...reportOptions ?? {}
|
|
1528
|
+
});
|
|
1529
|
+
this.agent = agent;
|
|
1530
|
+
return agent;
|
|
1531
|
+
}
|
|
1532
|
+
const displayId = opts?.mode === 'local' ? opts.displayId : void 0;
|
|
1533
|
+
const headless = opts?.mode === 'local' ? opts.headless : void 0;
|
|
1534
|
+
mcp_tools_debug('Creating Computer agent with displayId:', displayId || 'primary');
|
|
1523
1535
|
const agentOpts = {
|
|
1524
1536
|
...displayId ? {
|
|
1525
1537
|
displayId
|
|
@@ -1537,12 +1549,12 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
|
|
|
1537
1549
|
return [
|
|
1538
1550
|
{
|
|
1539
1551
|
name: 'computer_connect',
|
|
1540
|
-
description:
|
|
1552
|
+
description: "Connect to a computer desktop. Default (local) mode controls the local machine; pass displayId to target a specific local display (see computer_list_displays). Pass host to switch to RDP mode and connect to a remote Windows desktop via the RDP helper binary. RDP-related options (port/username/password/domain/securityProtocol/ignoreCertificate/adminSession/desktopWidth/desktopHeight) only take effect when host is set.",
|
|
1541
1553
|
schema: this.getAgentInitArgSchema(),
|
|
1542
1554
|
cli: this.getAgentInitArgCliMetadata(),
|
|
1543
1555
|
handler: async (args)=>{
|
|
1544
1556
|
const initArgs = this.extractAgentInitParam(args);
|
|
1545
|
-
const reportSession = this.createNewCliReportSession(initArgs
|
|
1557
|
+
const reportSession = this.createNewCliReportSession(getCliReportSessionTarget(initArgs));
|
|
1546
1558
|
this.commitCliReportSession(reportSession);
|
|
1547
1559
|
if (this.agent) {
|
|
1548
1560
|
try {
|
|
@@ -1558,7 +1570,7 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
|
|
|
1558
1570
|
content: [
|
|
1559
1571
|
{
|
|
1560
1572
|
type: 'text',
|
|
1561
|
-
text: `Connected to computer${
|
|
1573
|
+
text: `Connected to computer${describeConnectTarget(initArgs)}`
|
|
1562
1574
|
},
|
|
1563
1575
|
...this.buildScreenshotContent(screenshot)
|
|
1564
1576
|
]
|
|
@@ -1596,31 +1608,43 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
|
|
|
1596
1608
|
cli: {
|
|
1597
1609
|
preferBareKeys: true
|
|
1598
1610
|
},
|
|
1599
|
-
adapt: (extracted)=>extracted
|
|
1611
|
+
adapt: (extracted)=>adaptComputerInitArgs(extracted)
|
|
1600
1612
|
});
|
|
1601
1613
|
}
|
|
1602
1614
|
}
|
|
1603
1615
|
function version() {
|
|
1604
|
-
const currentVersion = "1.8.
|
|
1616
|
+
const currentVersion = "1.8.1";
|
|
1605
1617
|
console.log(`@midscene/computer v${currentVersion}`);
|
|
1606
1618
|
return currentVersion;
|
|
1607
1619
|
}
|
|
1620
|
+
function loadMacPermissions() {
|
|
1621
|
+
if ('darwin' !== process.platform) return {
|
|
1622
|
+
permissions: null
|
|
1623
|
+
};
|
|
1624
|
+
try {
|
|
1625
|
+
const dynamicRequire = createRequire(import.meta.url);
|
|
1626
|
+
return {
|
|
1627
|
+
permissions: dynamicRequire('node-mac-permissions')
|
|
1628
|
+
};
|
|
1629
|
+
} catch (error) {
|
|
1630
|
+
return {
|
|
1631
|
+
permissions: null,
|
|
1632
|
+
loadError: error instanceof Error ? error.message : String(error)
|
|
1633
|
+
};
|
|
1634
|
+
}
|
|
1635
|
+
}
|
|
1608
1636
|
function checkAccessibilityPermission(promptIfNeeded = false) {
|
|
1609
1637
|
if ('darwin' !== process.platform) return {
|
|
1610
1638
|
hasPermission: true,
|
|
1611
1639
|
platform: process.platform
|
|
1612
1640
|
};
|
|
1613
1641
|
try {
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
hasPermission: true,
|
|
1621
|
-
platform: process.platform
|
|
1622
|
-
};
|
|
1623
|
-
}
|
|
1642
|
+
const { permissions, loadError } = loadMacPermissions();
|
|
1643
|
+
if (!permissions) return {
|
|
1644
|
+
hasPermission: false,
|
|
1645
|
+
platform: process.platform,
|
|
1646
|
+
error: `Cannot verify macOS Accessibility permission: node-mac-permissions is unavailable${loadError ? ` (${loadError})` : ''}. The native module may need to be rebuilt for the current Node/Electron ABI.`
|
|
1647
|
+
};
|
|
1624
1648
|
const status = permissions.getAuthStatus('accessibility');
|
|
1625
1649
|
if ('authorized' === status) return {
|
|
1626
1650
|
hasPermission: true,
|
|
@@ -1640,6 +1664,37 @@ function checkAccessibilityPermission(promptIfNeeded = false) {
|
|
|
1640
1664
|
};
|
|
1641
1665
|
}
|
|
1642
1666
|
}
|
|
1667
|
+
function checkScreenRecordingPermission(promptIfNeeded = false) {
|
|
1668
|
+
if ('darwin' !== process.platform) return {
|
|
1669
|
+
hasPermission: true,
|
|
1670
|
+
platform: process.platform
|
|
1671
|
+
};
|
|
1672
|
+
try {
|
|
1673
|
+
const { permissions, loadError } = loadMacPermissions();
|
|
1674
|
+
if (!permissions) return {
|
|
1675
|
+
hasPermission: false,
|
|
1676
|
+
platform: process.platform,
|
|
1677
|
+
error: `Cannot verify macOS Screen Recording permission: node-mac-permissions is unavailable${loadError ? ` (${loadError})` : ''}. The native module may need to be rebuilt for the current Node/Electron ABI.`
|
|
1678
|
+
};
|
|
1679
|
+
const status = permissions.getAuthStatus('screen');
|
|
1680
|
+
if ('authorized' === status) return {
|
|
1681
|
+
hasPermission: true,
|
|
1682
|
+
platform: process.platform
|
|
1683
|
+
};
|
|
1684
|
+
if (promptIfNeeded) permissions.askForScreenCaptureAccess(true);
|
|
1685
|
+
return {
|
|
1686
|
+
hasPermission: false,
|
|
1687
|
+
platform: process.platform,
|
|
1688
|
+
error: `macOS Screen Recording permission is required (current status: ${status}).\n\nPlease follow these steps:\n1. Open System Settings > Privacy & Security > Screen Recording\n2. Enable the application running this script (e.g., Terminal, iTerm2, VS Code, WebStorm, or Midscene Studio)\n3. Fully quit and relaunch that application after granting permission — macOS only re-reads this permission on process launch.`
|
|
1689
|
+
};
|
|
1690
|
+
} catch (error) {
|
|
1691
|
+
return {
|
|
1692
|
+
hasPermission: false,
|
|
1693
|
+
platform: process.platform,
|
|
1694
|
+
error: `Failed to check screen recording permission: ${error instanceof Error ? error.message : String(error)}`
|
|
1695
|
+
};
|
|
1696
|
+
}
|
|
1697
|
+
}
|
|
1643
1698
|
async function checkComputerEnvironment() {
|
|
1644
1699
|
try {
|
|
1645
1700
|
const libnutModule = await import("@computer-use/libnut/dist/import_libnut.js");
|
|
@@ -1670,4 +1725,4 @@ async function checkComputerEnvironment() {
|
|
|
1670
1725
|
async function getConnectedDisplays() {
|
|
1671
1726
|
return ComputerDevice.listDisplays();
|
|
1672
1727
|
}
|
|
1673
|
-
export { ComputerAgent, ComputerDevice, ComputerMidsceneTools, HelperProcessRDPBackendClient, RDPDevice, UnsupportedRDPBackendClient, agentForComputer, agentForRDPComputer, agentFromComputer, checkAccessibilityPermission, checkComputerEnvironment, checkXvfbInstalled, createDefaultRDPBackendClient, getConnectedDisplays, needsXvfb, overrideAIConfig, version };
|
|
1728
|
+
export { ComputerAgent, ComputerDevice, ComputerMidsceneTools, HelperProcessRDPBackendClient, RDPDevice, UnsupportedRDPBackendClient, agentForComputer, agentForRDPComputer, agentFromComputer, checkAccessibilityPermission, checkComputerEnvironment, checkScreenRecordingPermission, checkXvfbInstalled, createDefaultRDPBackendClient, getConnectedDisplays, needsXvfb, overrideAIConfig, version };
|