@midscene/computer 1.8.0 → 1.8.1-beta-20260513084557.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/es/cli.mjs CHANGED
@@ -4,17 +4,17 @@ import { getDebug } from "@midscene/shared/logger";
4
4
  import { BaseMidsceneTools } from "@midscene/shared/mcp/base-tools";
5
5
  import { Agent } from "@midscene/core/agent";
6
6
  import node_assert from "node:assert";
7
- import { execFileSync, execSync, spawn as external_node_child_process_spawn, spawnSync } from "node:child_process";
8
- import { existsSync as external_node_fs_existsSync } from "node:fs";
7
+ import { execFileSync, execSync, spawn, spawnSync } from "node:child_process";
8
+ import { existsSync } from "node:fs";
9
9
  import { createRequire } from "node:module";
10
- import { dirname as external_node_path_dirname, resolve as external_node_path_resolve } from "node:path";
11
- import { fileURLToPath as external_node_url_fileURLToPath } from "node:url";
12
- import { actionHoverParamSchema, defineAction as device_defineAction, defineActionClearInput as device_defineActionClearInput, defineActionDoubleClick as device_defineActionDoubleClick, defineActionDragAndDrop as device_defineActionDragAndDrop, defineActionKeyboardPress as device_defineActionKeyboardPress, defineActionRightClick as device_defineActionRightClick, defineActionScroll as device_defineActionScroll, defineActionTap as device_defineActionTap } from "@midscene/core/device";
13
- import { sleep as utils_sleep } from "@midscene/core/utils";
10
+ import { dirname, resolve as external_node_path_resolve } from "node:path";
11
+ import { fileURLToPath } from "node:url";
12
+ import { actionHoverParamSchema, actionTapParamSchema, defineAction, defineActionClearInput, defineActionDoubleClick, defineActionDragAndDrop, defineActionHover, defineActionInput, defineActionKeyboardPress, defineActionRightClick, defineActionScroll, defineActionTap } from "@midscene/core/device";
13
+ import { sleep } from "@midscene/core/utils";
14
14
  import { createImgBase64ByFormat } from "@midscene/shared/img";
15
15
  import screenshot_desktop from "screenshot-desktop";
16
- import "node:events";
17
- import "node:readline";
16
+ import { once } from "node:events";
17
+ import { createInterface } from "node:readline";
18
18
  const debugXvfb = getDebug('computer:xvfb');
19
19
  function checkXvfbInstalled() {
20
20
  try {
@@ -27,7 +27,7 @@ function checkXvfbInstalled() {
27
27
  }
28
28
  }
29
29
  function findAvailableDisplay(startFrom = 99) {
30
- for(let n = startFrom; n < startFrom + 100; n++)if (!external_node_fs_existsSync(`/tmp/.X${n}-lock`)) return n;
30
+ for(let n = startFrom; n < startFrom + 100; n++)if (!existsSync(`/tmp/.X${n}-lock`)) return n;
31
31
  throw new Error(`No available display number found (checked ${startFrom} to ${startFrom + 99})`);
32
32
  }
33
33
  function startXvfb(options) {
@@ -36,7 +36,7 @@ function startXvfb(options) {
36
36
  const display = `:${displayNum}`;
37
37
  return new Promise((resolve, reject)=>{
38
38
  debugXvfb(`Starting Xvfb on display ${display} with resolution ${resolution}`);
39
- const xvfbProcess = external_node_child_process_spawn('Xvfb', [
39
+ const xvfbProcess = spawn('Xvfb', [
40
40
  display,
41
41
  '-screen',
42
42
  '0',
@@ -239,13 +239,13 @@ function getPhasedScrollBinary() {
239
239
  const require = createRequire(import.meta.url);
240
240
  let pkgRoot = null;
241
241
  try {
242
- pkgRoot = external_node_path_dirname(require.resolve('@midscene/computer/package.json'));
242
+ pkgRoot = dirname(require.resolve('@midscene/computer/package.json'));
243
243
  } catch {
244
- const hereDir = external_node_path_dirname(external_node_url_fileURLToPath(import.meta.url));
244
+ const hereDir = dirname(fileURLToPath(import.meta.url));
245
245
  for (const candidate of [
246
246
  external_node_path_resolve(hereDir, '..'),
247
247
  external_node_path_resolve(hereDir, '../..')
248
- ])if (external_node_fs_existsSync(external_node_path_resolve(candidate, 'package.json'))) {
248
+ ])if (existsSync(external_node_path_resolve(candidate, 'package.json'))) {
249
249
  pkgRoot = candidate;
250
250
  break;
251
251
  }
@@ -256,7 +256,7 @@ function getPhasedScrollBinary() {
256
256
  return null;
257
257
  }
258
258
  const binPath = external_node_path_resolve(pkgRoot, 'bin/darwin/phased-scroll');
259
- if (!external_node_fs_existsSync(binPath)) {
259
+ if (!existsSync(binPath)) {
260
260
  debugDevice('phased-scroll binary not found at', binPath);
261
261
  phasedScrollBinaryPath = null;
262
262
  return null;
@@ -299,7 +299,7 @@ async function smoothMoveMouse(targetX, targetY, steps, stepDelay) {
299
299
  const stepX = Math.round(currentPos.x + (targetX - currentPos.x) * i / steps);
300
300
  const stepY = Math.round(currentPos.y + (targetY - currentPos.y) * i / steps);
301
301
  libnut.moveMouse(stepX, stepY);
302
- await utils_sleep(stepDelay);
302
+ await sleep(stepDelay);
303
303
  }
304
304
  }
305
305
  const KEY_NAME_MAP = {
@@ -411,7 +411,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
411
411
  }
412
412
  async healthCheck() {
413
413
  console.log('[HealthCheck] Starting health check...');
414
- console.log("[HealthCheck] @midscene/computer v1.8.0");
414
+ console.log("[HealthCheck] @midscene/computer v1.8.1-beta-20260513084557.0");
415
415
  console.log('[HealthCheck] Taking screenshot...');
416
416
  const screenshotTimeout = 15000;
417
417
  let timeoutId;
@@ -433,7 +433,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
433
433
  const targetY = startPos.y + offsetY;
434
434
  console.log(`[HealthCheck] Moving mouse to (${targetX}, ${targetY})...`);
435
435
  libnut.moveMouse(targetX, targetY);
436
- await utils_sleep(50);
436
+ await sleep(50);
437
437
  const movedPos = libnut.getMousePos();
438
438
  console.log(`[HealthCheck] Mouse position after move: (${movedPos.x}, ${movedPos.y})`);
439
439
  const deltaX = Math.abs(movedPos.x - targetX);
@@ -516,7 +516,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
516
516
  const oldClipboard = await clipboardy.default.read().catch(()=>'');
517
517
  try {
518
518
  await clipboardy.default.write(text);
519
- await utils_sleep(50);
519
+ await sleep(50);
520
520
  if (this.useAppleScript) sendKeyViaAppleScript('v', [
521
521
  'command'
522
522
  ]);
@@ -526,7 +526,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
526
526
  modifier
527
527
  ]);
528
528
  }
529
- await utils_sleep(100);
529
+ await sleep(100);
530
530
  } finally{
531
531
  if (oldClipboard) await clipboardy.default.write(oldClipboard).catch(()=>{
532
532
  debugDevice('Failed to restore clipboard content');
@@ -539,7 +539,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
539
539
  }
540
540
  actionSpace() {
541
541
  const defaultActions = [
542
- device_defineActionTap(async (param)=>{
542
+ defineActionTap(async (param)=>{
543
543
  node_assert(libnut, 'libnut not initialized');
544
544
  const element = param.locate;
545
545
  node_assert(element, 'Element not found, cannot tap');
@@ -548,10 +548,10 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
548
548
  const targetY = Math.round(y);
549
549
  await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
550
550
  libnut.mouseToggle('down', 'left');
551
- await utils_sleep(CLICK_HOLD_DURATION);
551
+ await sleep(CLICK_HOLD_DURATION);
552
552
  libnut.mouseToggle('up', 'left');
553
553
  }),
554
- device_defineActionDoubleClick(async (param)=>{
554
+ defineActionDoubleClick(async (param)=>{
555
555
  node_assert(libnut, 'libnut not initialized');
556
556
  const element = param.locate;
557
557
  node_assert(element, 'Element not found, cannot double click');
@@ -559,7 +559,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
559
559
  libnut.moveMouse(Math.round(x), Math.round(y));
560
560
  libnut.mouseClick('left', true);
561
561
  }),
562
- device_defineActionRightClick(async (param)=>{
562
+ defineActionRightClick(async (param)=>{
563
563
  node_assert(libnut, 'libnut not initialized');
564
564
  const element = param.locate;
565
565
  node_assert(element, 'Element not found, cannot right click');
@@ -567,7 +567,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
567
567
  libnut.moveMouse(Math.round(x), Math.round(y));
568
568
  libnut.mouseClick('right');
569
569
  }),
570
- device_defineAction({
570
+ defineAction({
571
571
  name: 'MouseMove',
572
572
  description: 'Move the mouse to the element',
573
573
  interfaceAlias: 'aiHover',
@@ -585,10 +585,10 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
585
585
  const targetX = Math.round(x);
586
586
  const targetY = Math.round(y);
587
587
  await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_MOUSE_MOVE, SMOOTH_MOVE_DELAY_MOUSE_MOVE);
588
- await utils_sleep(MOUSE_MOVE_EFFECT_WAIT);
588
+ await sleep(MOUSE_MOVE_EFFECT_WAIT);
589
589
  }
590
590
  }),
591
- device_defineAction({
591
+ defineAction({
592
592
  name: 'Input',
593
593
  description: 'Input text into the input field',
594
594
  interfaceAlias: 'aiInput',
@@ -606,23 +606,23 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
606
606
  const [x, y] = element.center;
607
607
  libnut.moveMouse(Math.round(x), Math.round(y));
608
608
  libnut.mouseClick('left');
609
- await utils_sleep(INPUT_FOCUS_DELAY);
609
+ await sleep(INPUT_FOCUS_DELAY);
610
610
  if ('append' !== param.mode) {
611
611
  if (this.useAppleScript) {
612
612
  sendKeyViaAppleScript('a', [
613
613
  'command'
614
614
  ]);
615
- await utils_sleep(50);
615
+ await sleep(50);
616
616
  sendKeyViaAppleScript('backspace', []);
617
617
  } else {
618
618
  const modifier = 'darwin' === process.platform ? 'command' : 'control';
619
619
  libnut.keyTap('a', [
620
620
  modifier
621
621
  ]);
622
- await utils_sleep(50);
622
+ await sleep(50);
623
623
  libnut.keyTap('backspace');
624
624
  }
625
- await utils_sleep(INPUT_CLEAR_DELAY);
625
+ await sleep(INPUT_CLEAR_DELAY);
626
626
  }
627
627
  }
628
628
  if ('clear' === param.mode) return;
@@ -630,7 +630,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
630
630
  await this.smartTypeString(param.value);
631
631
  }
632
632
  }),
633
- device_defineActionScroll(async (param)=>{
633
+ defineActionScroll(async (param)=>{
634
634
  node_assert(libnut, 'libnut not initialized');
635
635
  if (param.locate) {
636
636
  const element = param.locate;
@@ -640,16 +640,16 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
640
640
  const scrollType = param?.scrollType;
641
641
  const edgeSpec = scrollType && scrollType in EDGE_SCROLL_SPEC ? EDGE_SCROLL_SPEC[scrollType] : null;
642
642
  if (edgeSpec) {
643
- if (runPhasedScroll(edgeSpec.direction, EDGE_SCROLL_TOTAL_PX, EDGE_SCROLL_STEPS)) return void await utils_sleep(SCROLL_COMPLETE_DELAY);
643
+ if (runPhasedScroll(edgeSpec.direction, EDGE_SCROLL_TOTAL_PX, EDGE_SCROLL_STEPS)) return void await sleep(SCROLL_COMPLETE_DELAY);
644
644
  if (this.useAppleScript) {
645
645
  sendKeyViaAppleScript(edgeSpec.key);
646
- await utils_sleep(SCROLL_COMPLETE_DELAY);
646
+ await sleep(SCROLL_COMPLETE_DELAY);
647
647
  return;
648
648
  }
649
649
  const [dx, dy] = edgeSpec.libnut;
650
650
  for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
651
651
  libnut.scrollMouse(dx, dy);
652
- await utils_sleep(SCROLL_STEP_DELAY);
652
+ await sleep(SCROLL_STEP_DELAY);
653
653
  }
654
654
  return;
655
655
  }
@@ -659,16 +659,16 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
659
659
  const isKnownDirection = 'up' === direction || 'down' === direction || 'left' === direction || 'right' === direction;
660
660
  if (isKnownDirection) {
661
661
  const steps = Math.max(PHASED_MIN_STEPS, Math.round(distance / PHASED_PIXELS_PER_STEP));
662
- if (runPhasedScroll(direction, distance, steps)) return void await utils_sleep(SCROLL_COMPLETE_DELAY);
662
+ if (runPhasedScroll(direction, distance, steps)) return void await sleep(SCROLL_COMPLETE_DELAY);
663
663
  }
664
664
  if (this.useAppleScript && ('up' === direction || 'down' === direction)) {
665
665
  const pages = Math.max(1, Math.round(distance / APPROX_VIEWPORT_HEIGHT_PX));
666
666
  const key = 'up' === direction ? 'pageup' : 'pagedown';
667
667
  for(let i = 0; i < pages; i++){
668
668
  sendKeyViaAppleScript(key);
669
- await utils_sleep(SCROLL_STEP_DELAY);
669
+ await sleep(SCROLL_STEP_DELAY);
670
670
  }
671
- await utils_sleep(SCROLL_COMPLETE_DELAY);
671
+ await sleep(SCROLL_COMPLETE_DELAY);
672
672
  return;
673
673
  }
674
674
  const ticks = Math.ceil(distance / 100);
@@ -695,18 +695,18 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
695
695
  -ticks
696
696
  ];
697
697
  libnut.scrollMouse(dx, dy);
698
- await utils_sleep(SCROLL_COMPLETE_DELAY);
698
+ await sleep(SCROLL_COMPLETE_DELAY);
699
699
  return;
700
700
  }
701
701
  throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
702
702
  }),
703
- device_defineActionKeyboardPress(async (param)=>{
703
+ defineActionKeyboardPress(async (param)=>{
704
704
  node_assert(libnut, 'libnut not initialized');
705
705
  if (param.locate) {
706
706
  const [x, y] = param.locate.center;
707
707
  libnut.moveMouse(Math.round(x), Math.round(y));
708
708
  libnut.mouseClick('left');
709
- await utils_sleep(50);
709
+ await sleep(50);
710
710
  }
711
711
  const keys = param.keyName.split('+');
712
712
  const modifiers = keys.slice(0, -1).map(normalizeKeyName);
@@ -721,7 +721,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
721
721
  else if (modifiers.length > 0) libnut.keyTap(key, modifiers);
722
722
  else libnut.keyTap(key);
723
723
  }),
724
- device_defineActionDragAndDrop(async (param)=>{
724
+ defineActionDragAndDrop(async (param)=>{
725
725
  node_assert(libnut, 'libnut not initialized');
726
726
  const from = param.from;
727
727
  const to = param.to;
@@ -731,24 +731,24 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
731
731
  const [toX, toY] = to.center;
732
732
  libnut.moveMouse(Math.round(fromX), Math.round(fromY));
733
733
  libnut.mouseToggle('down', 'left');
734
- await utils_sleep(100);
734
+ await sleep(100);
735
735
  libnut.moveMouse(Math.round(toX), Math.round(toY));
736
- await utils_sleep(100);
736
+ await sleep(100);
737
737
  libnut.mouseToggle('up', 'left');
738
738
  }),
739
- device_defineActionClearInput(async (param)=>{
739
+ defineActionClearInput(async (param)=>{
740
740
  node_assert(libnut, 'libnut not initialized');
741
741
  const element = param.locate;
742
742
  node_assert(element, 'Element not found, cannot clear input');
743
743
  const [x, y] = element.center;
744
744
  libnut.moveMouse(Math.round(x), Math.round(y));
745
745
  libnut.mouseClick('left');
746
- await utils_sleep(100);
746
+ await sleep(100);
747
747
  if (this.useAppleScript) {
748
748
  sendKeyViaAppleScript('a', [
749
749
  'command'
750
750
  ]);
751
- await utils_sleep(50);
751
+ await sleep(50);
752
752
  sendKeyViaAppleScript('backspace', []);
753
753
  } else {
754
754
  const modifier = 'darwin' === process.platform ? 'command' : 'control';
@@ -757,7 +757,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
757
757
  ]);
758
758
  libnut.keyTap('backspace');
759
759
  }
760
- await utils_sleep(50);
760
+ await sleep(50);
761
761
  })
762
762
  ];
763
763
  const platformActions = Object.values(createPlatformActions());
@@ -803,15 +803,606 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
803
803
  }
804
804
  function createPlatformActions() {
805
805
  return {
806
- ListDisplays: device_defineAction({
806
+ ListDisplays: defineAction({
807
807
  name: 'ListDisplays',
808
808
  description: 'List all available displays/monitors',
809
809
  call: async ()=>await ComputerDevice.listDisplays()
810
810
  })
811
811
  };
812
812
  }
813
- getDebug('rdp:backend');
814
- getDebug('rdp:device');
813
+ const platformBinaryMap = {
814
+ darwin: {
815
+ directory: 'darwin',
816
+ fileName: 'rdp-helper'
817
+ },
818
+ linux: {
819
+ directory: 'linux',
820
+ fileName: 'rdp-helper'
821
+ },
822
+ win32: {
823
+ directory: 'win32',
824
+ fileName: 'rdp-helper.exe'
825
+ }
826
+ };
827
+ function getPlatformBinary(platform) {
828
+ if (platform in platformBinaryMap) return platformBinaryMap[platform];
829
+ }
830
+ function currentDirname() {
831
+ if ('undefined' != typeof __dirname) return __dirname;
832
+ return dirname(fileURLToPath(import.meta.url));
833
+ }
834
+ function getRdpHelperBinaryPath() {
835
+ const platformBinary = getPlatformBinary(process.platform);
836
+ if (!platformBinary) throw new Error(`@midscene/computer RDP helper does not support platform ${process.platform}`);
837
+ const hereDir = currentDirname();
838
+ const candidateRoots = [
839
+ external_node_path_resolve(hereDir, '../..'),
840
+ external_node_path_resolve(hereDir, '../../..')
841
+ ];
842
+ for (const root of candidateRoots){
843
+ const binaryPath = external_node_path_resolve(root, 'bin', platformBinary.directory, platformBinary.fileName);
844
+ if (existsSync(binaryPath)) return binaryPath;
845
+ }
846
+ throw new Error(`RDP helper binary not found for ${process.platform}. Run \`pnpm --filter @midscene/computer run build:native\` first.`);
847
+ }
848
+ function backend_client_define_property(obj, key, value) {
849
+ if (key in obj) Object.defineProperty(obj, key, {
850
+ value: value,
851
+ enumerable: true,
852
+ configurable: true,
853
+ writable: true
854
+ });
855
+ else obj[key] = value;
856
+ return obj;
857
+ }
858
+ const debug = getDebug('rdp:backend');
859
+ const HELPER_SHUTDOWN_TIMEOUT_MS = 3000;
860
+ const MAX_STDERR_LINES = 40;
861
+ class HelperProcessRDPBackendClient {
862
+ async connect(config) {
863
+ this.fatalHelperError = void 0;
864
+ await this.ensureHelperStarted();
865
+ const response = await this.send({
866
+ type: 'connect',
867
+ config
868
+ });
869
+ if ('connected' !== response.type) throw new Error(`Expected connected response, got ${response.type}`);
870
+ this.connected = true;
871
+ this.fatalHelperError = void 0;
872
+ return response.info;
873
+ }
874
+ async disconnect() {
875
+ const child = this.child;
876
+ if (!child) return;
877
+ let disconnectError;
878
+ if (this.connected && null === child.exitCode) try {
879
+ const response = await this.send({
880
+ type: 'disconnect'
881
+ });
882
+ this.expectOk(response, 'disconnect');
883
+ } catch (error) {
884
+ disconnectError = error instanceof Error ? error : new Error(String(error));
885
+ }
886
+ this.connected = false;
887
+ this.fatalHelperError = void 0;
888
+ await this.shutdownHelper();
889
+ if (disconnectError && !/RDP helper exited unexpectedly|RDP helper is not running|RDP helper shut down/u.test(disconnectError.message)) throw disconnectError;
890
+ }
891
+ async screenshotBase64() {
892
+ const response = await this.send({
893
+ type: 'screenshot'
894
+ });
895
+ if ('screenshot' !== response.type) throw new Error(`Expected screenshot response, got ${response.type}`);
896
+ return response.base64;
897
+ }
898
+ async size() {
899
+ const response = await this.send({
900
+ type: 'size'
901
+ });
902
+ if ('size' !== response.type) throw new Error(`Expected size response, got ${response.type}`);
903
+ return response.size;
904
+ }
905
+ async mouseMove(x, y) {
906
+ const response = await this.send({
907
+ type: 'mouseMove',
908
+ x,
909
+ y
910
+ });
911
+ this.expectOk(response, 'mouseMove');
912
+ }
913
+ async mouseButton(button, action) {
914
+ const response = await this.send({
915
+ type: 'mouseButton',
916
+ button,
917
+ action
918
+ });
919
+ this.expectOk(response, 'mouseButton');
920
+ }
921
+ async wheel(direction, amount, x, y) {
922
+ const response = await this.send({
923
+ type: 'wheel',
924
+ direction,
925
+ amount,
926
+ x,
927
+ y
928
+ });
929
+ this.expectOk(response, 'wheel');
930
+ }
931
+ async keyPress(keyName) {
932
+ const response = await this.send({
933
+ type: 'keyPress',
934
+ keyName
935
+ });
936
+ this.expectOk(response, 'keyPress');
937
+ }
938
+ async typeText(text) {
939
+ const response = await this.send({
940
+ type: 'typeText',
941
+ text
942
+ });
943
+ this.expectOk(response, 'typeText');
944
+ }
945
+ async clearInput() {
946
+ const response = await this.send({
947
+ type: 'clearInput'
948
+ });
949
+ this.expectOk(response, 'clearInput');
950
+ }
951
+ async ensureHelperStarted() {
952
+ if (this.child && null === this.child.exitCode) return;
953
+ const helperPath = this.resolveHelperPath();
954
+ debug('starting rdp helper', {
955
+ helperPath
956
+ });
957
+ const child = this.spawnFn(helperPath, [], {
958
+ stdio: [
959
+ 'pipe',
960
+ 'pipe',
961
+ 'pipe'
962
+ ]
963
+ });
964
+ child.stdout.setEncoding('utf8');
965
+ child.stderr.setEncoding('utf8');
966
+ this.child = child;
967
+ this.stderrLines.length = 0;
968
+ this.stdoutReader = createInterface({
969
+ input: child.stdout,
970
+ crlfDelay: 1 / 0
971
+ });
972
+ this.stderrReader = createInterface({
973
+ input: child.stderr,
974
+ crlfDelay: 1 / 0
975
+ });
976
+ this.stdoutReader.on('line', (line)=>{
977
+ this.handleStdoutLine(line);
978
+ });
979
+ this.stderrReader.on('line', (line)=>{
980
+ this.captureStderrLine(line);
981
+ });
982
+ child.on('exit', (code, signal)=>{
983
+ this.connected = false;
984
+ const error = this.createHelperError(`RDP helper exited unexpectedly (code=${code}, signal=${signal})`);
985
+ this.fatalHelperError = error;
986
+ this.rejectPending(error);
987
+ this.disposeReaders();
988
+ this.child = void 0;
989
+ });
990
+ child.on('error', (error)=>{
991
+ this.connected = false;
992
+ const helperError = this.createHelperError(`Failed to start RDP helper: ${error.message}`);
993
+ this.fatalHelperError = helperError;
994
+ this.rejectPending(helperError);
995
+ this.disposeReaders();
996
+ this.child = void 0;
997
+ });
998
+ }
999
+ handleStdoutLine(line) {
1000
+ if (!line.trim()) return;
1001
+ let parsed;
1002
+ try {
1003
+ parsed = JSON.parse(line);
1004
+ } catch (error) {
1005
+ const protocolError = this.createHelperError(`RDP helper emitted malformed JSON: ${line}`);
1006
+ this.rejectPending(protocolError);
1007
+ this.shutdownHelper(protocolError);
1008
+ return;
1009
+ }
1010
+ const pending = this.pending.get(parsed.id);
1011
+ if (!pending) return void debug('dropping response for unknown request id', parsed);
1012
+ this.pending.delete(parsed.id);
1013
+ if (parsed.ok) return void pending.resolve(parsed.payload);
1014
+ pending.reject(this.createHelperError(parsed.error.message, parsed.error.code));
1015
+ }
1016
+ captureStderrLine(line) {
1017
+ if (!line.trim()) return;
1018
+ this.stderrLines.push(line);
1019
+ if (this.stderrLines.length > MAX_STDERR_LINES) this.stderrLines.shift();
1020
+ }
1021
+ async send(payload) {
1022
+ if ('connect' !== payload.type && this.fatalHelperError && (!this.child || null !== this.child.exitCode)) throw this.fatalHelperError;
1023
+ await this.ensureHelperStarted();
1024
+ const child = this.child;
1025
+ if (!child || null !== child.exitCode) throw this.createHelperError('RDP helper is not running');
1026
+ const id = `req-${++this.nextRequestId}`;
1027
+ const request = {
1028
+ id,
1029
+ payload
1030
+ };
1031
+ return new Promise((resolve, reject)=>{
1032
+ this.pending.set(id, {
1033
+ resolve,
1034
+ reject
1035
+ });
1036
+ child.stdin.write(`${JSON.stringify(request)}\n`, (error)=>{
1037
+ if (!error) return;
1038
+ this.pending.delete(id);
1039
+ reject(this.createHelperError(`Failed to send ${payload.type} request to RDP helper: ${error.message}`));
1040
+ });
1041
+ });
1042
+ }
1043
+ expectOk(response, actionName) {
1044
+ if ('ok' !== response.type) throw new Error(`Expected ok response for ${actionName}, got ${response.type}`);
1045
+ }
1046
+ rejectPending(error) {
1047
+ for (const { reject } of this.pending.values())reject(error);
1048
+ this.pending.clear();
1049
+ }
1050
+ createHelperError(message, code) {
1051
+ const stderrSummary = this.stderrLines.join('\n').trim();
1052
+ const suffix = stderrSummary ? `\nHelper stderr:\n${stderrSummary}` : '';
1053
+ const error = new Error(`${message}${suffix}`);
1054
+ if (code) error.name = code;
1055
+ return error;
1056
+ }
1057
+ disposeReaders() {
1058
+ this.stdoutReader?.close();
1059
+ this.stderrReader?.close();
1060
+ this.stdoutReader = void 0;
1061
+ this.stderrReader = void 0;
1062
+ }
1063
+ async shutdownHelper(rootError) {
1064
+ const child = this.child;
1065
+ this.child = void 0;
1066
+ this.disposeReaders();
1067
+ if (!child) return;
1068
+ this.rejectPending(rootError || this.createHelperError('RDP helper shut down'));
1069
+ if (null !== child.exitCode) return;
1070
+ child.stdin.end();
1071
+ const exited = Promise.race([
1072
+ once(child, 'exit'),
1073
+ new Promise((resolve)=>{
1074
+ setTimeout(()=>resolve('timeout'), HELPER_SHUTDOWN_TIMEOUT_MS);
1075
+ })
1076
+ ]);
1077
+ const result = await exited;
1078
+ if ('timeout' !== result) return;
1079
+ child.kill('SIGTERM');
1080
+ const terminated = Promise.race([
1081
+ once(child, 'exit'),
1082
+ new Promise((resolve)=>{
1083
+ setTimeout(()=>resolve('timeout'), HELPER_SHUTDOWN_TIMEOUT_MS);
1084
+ })
1085
+ ]);
1086
+ const terminateResult = await terminated;
1087
+ if ('timeout' !== terminateResult) return;
1088
+ child.kill('SIGKILL');
1089
+ await once(child, 'exit');
1090
+ }
1091
+ constructor(options){
1092
+ backend_client_define_property(this, "spawnFn", void 0);
1093
+ backend_client_define_property(this, "resolveHelperPath", void 0);
1094
+ backend_client_define_property(this, "child", void 0);
1095
+ backend_client_define_property(this, "stdoutReader", void 0);
1096
+ backend_client_define_property(this, "stderrReader", void 0);
1097
+ backend_client_define_property(this, "pending", new Map());
1098
+ backend_client_define_property(this, "stderrLines", []);
1099
+ backend_client_define_property(this, "nextRequestId", 0);
1100
+ backend_client_define_property(this, "connected", false);
1101
+ backend_client_define_property(this, "fatalHelperError", void 0);
1102
+ this.spawnFn = options?.spawnFn || spawn;
1103
+ const overridePath = options?.helperPath;
1104
+ this.resolveHelperPath = overridePath ? ()=>overridePath : getRdpHelperBinaryPath;
1105
+ }
1106
+ }
1107
+ function createDefaultRDPBackendClient() {
1108
+ return new HelperProcessRDPBackendClient();
1109
+ }
1110
+ function device_define_property(obj, key, value) {
1111
+ if (key in obj) Object.defineProperty(obj, key, {
1112
+ value: value,
1113
+ enumerable: true,
1114
+ configurable: true,
1115
+ writable: true
1116
+ });
1117
+ else obj[key] = value;
1118
+ return obj;
1119
+ }
1120
+ const device_debug = getDebug('rdp:device');
1121
+ const device_SMOOTH_MOVE_STEPS_TAP = 8;
1122
+ const device_SMOOTH_MOVE_STEPS_MOUSE_MOVE = 10;
1123
+ const SMOOTH_MOVE_STEPS_DRAG = 12;
1124
+ const device_SMOOTH_MOVE_DELAY_TAP = 8;
1125
+ const device_SMOOTH_MOVE_DELAY_MOUSE_MOVE = 10;
1126
+ const SMOOTH_MOVE_DELAY_DRAG = 10;
1127
+ const device_MOUSE_MOVE_EFFECT_WAIT = 300;
1128
+ const device_CLICK_HOLD_DURATION = 50;
1129
+ const DRAG_HOLD_DURATION = 100;
1130
+ const device_INPUT_FOCUS_DELAY = 300;
1131
+ const device_INPUT_CLEAR_DELAY = 150;
1132
+ const device_SCROLL_STEP_DELAY = 100;
1133
+ const device_SCROLL_COMPLETE_DELAY = 500;
1134
+ const DEFAULT_SCROLL_DISTANCE = 480;
1135
+ const device_EDGE_SCROLL_STEPS = 10;
1136
+ const DEFAULT_SCROLL_STEP_AMOUNT = 120;
1137
+ class RDPDevice {
1138
+ describe() {
1139
+ const port = this.options.port || 3389;
1140
+ const username = this.options.username ? ` as ${this.options.username}` : '';
1141
+ const session = this.connectionInfo?.sessionId ? ` [session ${this.connectionInfo.sessionId}]` : '';
1142
+ return `RDP Device ${this.options.host}:${port}${username}${session}`;
1143
+ }
1144
+ async connect() {
1145
+ this.throwIfDestroyed();
1146
+ device_debug('connecting to rdp backend', {
1147
+ host: this.options.host,
1148
+ port: this.options.port,
1149
+ username: this.options.username
1150
+ });
1151
+ this.connectionInfo = await this.backend.connect(this.options);
1152
+ this.cursorPosition = [
1153
+ Math.round(this.connectionInfo.size.width / 2),
1154
+ Math.round(this.connectionInfo.size.height / 2)
1155
+ ];
1156
+ }
1157
+ async screenshotBase64() {
1158
+ this.assertConnected();
1159
+ return this.backend.screenshotBase64();
1160
+ }
1161
+ async size() {
1162
+ this.assertConnected();
1163
+ return this.backend.size();
1164
+ }
1165
+ async destroy() {
1166
+ if (this.destroyed) return;
1167
+ this.destroyed = true;
1168
+ this.connectionInfo = void 0;
1169
+ this.cursorPosition = void 0;
1170
+ await this.backend.disconnect();
1171
+ }
1172
+ actionSpace() {
1173
+ const defaultActions = [
1174
+ defineActionTap(async ({ locate })=>{
1175
+ const element = this.requireLocate(locate, 'tap');
1176
+ await this.moveToElement(element, {
1177
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1178
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1179
+ });
1180
+ await this.backend.mouseButton('left', 'down');
1181
+ await sleep(device_CLICK_HOLD_DURATION);
1182
+ await this.backend.mouseButton('left', 'up');
1183
+ }),
1184
+ defineActionDoubleClick(async ({ locate })=>{
1185
+ const element = this.requireLocate(locate, 'double click');
1186
+ await this.moveToElement(element, {
1187
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1188
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1189
+ });
1190
+ await this.backend.mouseButton('left', 'doubleClick');
1191
+ }),
1192
+ defineActionRightClick(async ({ locate })=>{
1193
+ const element = this.requireLocate(locate, 'right click');
1194
+ await this.moveToElement(element, {
1195
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1196
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1197
+ });
1198
+ await this.backend.mouseButton('right', 'click');
1199
+ }),
1200
+ defineActionHover(async ({ locate })=>{
1201
+ const element = this.requireLocate(locate, 'hover');
1202
+ await this.moveToElement(element, {
1203
+ steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1204
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE,
1205
+ settleDelayMs: device_MOUSE_MOVE_EFFECT_WAIT
1206
+ });
1207
+ }),
1208
+ defineActionInput(async (param)=>{
1209
+ this.assertConnected();
1210
+ if (param.locate) {
1211
+ await this.moveToElement(param.locate, {
1212
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1213
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1214
+ });
1215
+ await this.backend.mouseButton('left', 'click');
1216
+ await sleep(device_INPUT_FOCUS_DELAY);
1217
+ }
1218
+ if ('typeOnly' !== param.mode) {
1219
+ await this.clearInput();
1220
+ await sleep(device_INPUT_CLEAR_DELAY);
1221
+ }
1222
+ if ('clear' === param.mode) return;
1223
+ if (param.value) await this.backend.typeText(param.value);
1224
+ }),
1225
+ defineActionClearInput(async ({ locate })=>{
1226
+ this.assertConnected();
1227
+ if (locate) {
1228
+ await this.moveToElement(locate, {
1229
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1230
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1231
+ });
1232
+ await this.backend.mouseButton('left', 'click');
1233
+ await sleep(device_INPUT_FOCUS_DELAY);
1234
+ }
1235
+ await this.clearInput();
1236
+ await sleep(device_INPUT_CLEAR_DELAY);
1237
+ }),
1238
+ defineActionKeyboardPress(async ({ locate, keyName })=>{
1239
+ this.assertConnected();
1240
+ if (locate) {
1241
+ await this.moveToElement(locate, {
1242
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1243
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1244
+ });
1245
+ await this.backend.mouseButton('left', 'click');
1246
+ }
1247
+ await this.backend.keyPress(keyName);
1248
+ }),
1249
+ defineActionScroll(async (param)=>{
1250
+ this.assertConnected();
1251
+ const target = param.locate;
1252
+ if (target) await this.moveToElement(target, {
1253
+ steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1254
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE
1255
+ });
1256
+ if (param.scrollType && 'singleAction' !== param.scrollType) {
1257
+ const direction = this.edgeScrollDirection(param.scrollType);
1258
+ for(let i = 0; i < device_EDGE_SCROLL_STEPS; i++)await this.performWheel(direction, DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1259
+ await sleep(device_SCROLL_COMPLETE_DELAY);
1260
+ return;
1261
+ }
1262
+ await this.performWheel(param.direction || 'down', param.distance || DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1263
+ await sleep(device_SCROLL_COMPLETE_DELAY);
1264
+ }),
1265
+ defineActionDragAndDrop(async ({ from, to })=>{
1266
+ this.assertConnected();
1267
+ const source = this.requireLocate(from, 'drag source');
1268
+ const target = this.requireLocate(to, 'drag target');
1269
+ await this.moveToElement(source, {
1270
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1271
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1272
+ });
1273
+ await this.backend.mouseButton('left', 'down');
1274
+ await sleep(DRAG_HOLD_DURATION);
1275
+ await this.moveToElement(target, {
1276
+ steps: SMOOTH_MOVE_STEPS_DRAG,
1277
+ stepDelayMs: SMOOTH_MOVE_DELAY_DRAG
1278
+ });
1279
+ await sleep(DRAG_HOLD_DURATION);
1280
+ await this.backend.mouseButton('left', 'up');
1281
+ }),
1282
+ defineAction({
1283
+ name: 'MiddleClick',
1284
+ description: 'Middle click the element',
1285
+ sample: {
1286
+ locate: {
1287
+ prompt: 'the browser tab close target'
1288
+ }
1289
+ },
1290
+ paramSchema: actionTapParamSchema,
1291
+ call: async ({ locate })=>{
1292
+ const element = this.requireLocate(locate, 'middle click');
1293
+ await this.moveToElement(element, {
1294
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1295
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1296
+ });
1297
+ await this.backend.mouseButton('middle', 'click');
1298
+ }
1299
+ }),
1300
+ defineAction({
1301
+ name: 'ListDisplays',
1302
+ description: 'List all available displays/monitors',
1303
+ call: async ()=>{
1304
+ this.assertConnected();
1305
+ const size = await this.size();
1306
+ return [
1307
+ {
1308
+ id: this.connectionInfo?.sessionId || this.options.host,
1309
+ name: `RDP ${this.connectionInfo?.server || this.options.host} (${size.width}x${size.height})`,
1310
+ primary: true
1311
+ }
1312
+ ];
1313
+ }
1314
+ })
1315
+ ];
1316
+ return [
1317
+ ...defaultActions,
1318
+ ...this.options.customActions || []
1319
+ ];
1320
+ }
1321
+ assertConnected() {
1322
+ this.throwIfDestroyed();
1323
+ if (!this.connectionInfo) throw new Error('RDPDevice is not connected');
1324
+ }
1325
+ throwIfDestroyed() {
1326
+ if (this.destroyed) throw new Error('RDPDevice has been destroyed');
1327
+ }
1328
+ requireLocate(locate, actionName) {
1329
+ if (!locate) throw new Error(`Missing target element for ${actionName}`);
1330
+ return locate;
1331
+ }
1332
+ async moveToElement(element, options) {
1333
+ this.assertConnected();
1334
+ const targetX = Math.round(element.center[0]);
1335
+ const targetY = Math.round(element.center[1]);
1336
+ await this.movePointer(targetX, targetY, options);
1337
+ }
1338
+ async clearInput() {
1339
+ if (this.backend.clearInput) return void await this.backend.clearInput();
1340
+ await this.backend.keyPress('Control+A');
1341
+ await this.backend.keyPress('Backspace');
1342
+ }
1343
+ edgeScrollDirection(scrollType) {
1344
+ switch(scrollType){
1345
+ case 'scrollToTop':
1346
+ return 'up';
1347
+ case 'scrollToBottom':
1348
+ return 'down';
1349
+ case 'scrollToLeft':
1350
+ return 'left';
1351
+ case 'scrollToRight':
1352
+ return 'right';
1353
+ case 'singleAction':
1354
+ return 'down';
1355
+ default:
1356
+ throw new Error(`Unsupported scroll type: ${scrollType}`);
1357
+ }
1358
+ }
1359
+ async movePointer(targetX, targetY, options) {
1360
+ this.assertConnected();
1361
+ const start = this.cursorPosition || [
1362
+ targetX,
1363
+ targetY
1364
+ ];
1365
+ const steps = Math.max(1, options?.steps || 1);
1366
+ const stepDelayMs = options?.stepDelayMs || 0;
1367
+ for(let step = 1; step <= steps; step++){
1368
+ const x = Math.round(start[0] + (targetX - start[0]) * step / steps);
1369
+ const y = Math.round(start[1] + (targetY - start[1]) * step / steps);
1370
+ await this.backend.mouseMove(x, y);
1371
+ this.cursorPosition = [
1372
+ x,
1373
+ y
1374
+ ];
1375
+ if (stepDelayMs > 0 && step < steps) await sleep(stepDelayMs);
1376
+ }
1377
+ if (options?.settleDelayMs) await sleep(options.settleDelayMs);
1378
+ }
1379
+ async performWheel(direction, amount, x, y) {
1380
+ let remaining = Math.abs(amount);
1381
+ if (0 === remaining) remaining = DEFAULT_SCROLL_STEP_AMOUNT;
1382
+ while(remaining > 0){
1383
+ const chunk = Math.min(remaining, DEFAULT_SCROLL_STEP_AMOUNT);
1384
+ await this.backend.wheel(direction, chunk, x, y);
1385
+ remaining -= chunk;
1386
+ if (remaining > 0) await sleep(device_SCROLL_STEP_DELAY);
1387
+ }
1388
+ }
1389
+ constructor(options){
1390
+ device_define_property(this, "interfaceType", 'rdp');
1391
+ device_define_property(this, "options", void 0);
1392
+ device_define_property(this, "backend", void 0);
1393
+ device_define_property(this, "connectionInfo", void 0);
1394
+ device_define_property(this, "destroyed", false);
1395
+ device_define_property(this, "cursorPosition", void 0);
1396
+ device_define_property(this, "uri", void 0);
1397
+ this.options = {
1398
+ port: 3389,
1399
+ securityProtocol: 'auto',
1400
+ ignoreCertificate: false,
1401
+ ...options
1402
+ };
1403
+ this.backend = options.backend || createDefaultRDPBackendClient();
1404
+ }
1405
+ }
815
1406
  class ComputerAgent extends Agent {
816
1407
  }
817
1408
  function createLocalComputerDevice(opts) {
@@ -823,12 +1414,33 @@ function createLocalComputerDevice(opts) {
823
1414
  xvfbResolution: opts?.xvfbResolution
824
1415
  });
825
1416
  }
1417
+ function createRDPComputerDevice(opts) {
1418
+ return new RDPDevice({
1419
+ host: opts.host,
1420
+ port: opts.port,
1421
+ username: opts.username,
1422
+ password: opts.password,
1423
+ domain: opts.domain,
1424
+ adminSession: opts.adminSession,
1425
+ ignoreCertificate: opts.ignoreCertificate,
1426
+ securityProtocol: opts.securityProtocol,
1427
+ desktopWidth: opts.desktopWidth,
1428
+ desktopHeight: opts.desktopHeight,
1429
+ backend: opts.backend,
1430
+ customActions: opts.customActions
1431
+ });
1432
+ }
826
1433
  async function agentForComputer(opts) {
827
1434
  const device = createLocalComputerDevice(opts);
828
1435
  await device.connect();
829
1436
  return new ComputerAgent(device, opts);
830
1437
  }
831
1438
  const agentFromComputer = agentForComputer;
1439
+ async function agentForRDPComputer(opts) {
1440
+ const device = createRDPComputerDevice(opts);
1441
+ await device.connect();
1442
+ return new ComputerAgent(device, opts);
1443
+ }
832
1444
  function mcp_tools_define_property(obj, key, value) {
833
1445
  if (key in obj) Object.defineProperty(obj, key, {
834
1446
  value: value,
@@ -840,10 +1452,61 @@ function mcp_tools_define_property(obj, key, value) {
840
1452
  return obj;
841
1453
  }
842
1454
  const mcp_tools_debug = getDebug('mcp:computer-tools');
1455
+ const RDP_SECURITY_PROTOCOLS = [
1456
+ 'auto',
1457
+ 'tls',
1458
+ 'nla',
1459
+ 'rdp'
1460
+ ];
843
1461
  const computerInitArgShape = {
844
- displayId: z.string().optional().describe('Display ID (from computer_list_displays)'),
845
- headless: z.boolean().optional().describe('Start virtual display via Xvfb (Linux only)')
1462
+ displayId: z.string().optional().describe('Display ID for local mode (from computer_list_displays). Ignored when host is set.'),
1463
+ headless: z.boolean().optional().describe('Start virtual display via Xvfb (Linux local mode only). Ignored when host is set.'),
1464
+ host: z.string().optional().describe('RDP host (FQDN or IP). Set this to switch into RDP mode.'),
1465
+ port: z.number().optional().describe('RDP port (default 3389). Requires host.'),
1466
+ username: z.string().optional().describe('RDP username. Requires host.'),
1467
+ password: z.string().optional().describe('RDP password. Requires host. Prefer setting via environment or a secrets manager.'),
1468
+ domain: z.string().optional().describe('RDP domain. Requires host.'),
1469
+ adminSession: z.boolean().optional().describe('Attach to the RDP admin/console session. Requires host.'),
1470
+ ignoreCertificate: z.boolean().optional().describe('Skip TLS certificate validation. Requires host.'),
1471
+ securityProtocol: z["enum"](RDP_SECURITY_PROTOCOLS).optional().describe('RDP security protocol negotiation (default auto). Requires host.'),
1472
+ desktopWidth: z.number().optional().describe('Remote desktop width in pixels. Requires host.'),
1473
+ desktopHeight: z.number().optional().describe('Remote desktop height in pixels. Requires host.')
846
1474
  };
1475
+ function adaptComputerInitArgs(extracted) {
1476
+ if (!extracted || 0 === Object.keys(extracted).length) return;
1477
+ if (extracted.host) {
1478
+ const { displayId: _d, headless: _h, ...rdpFields } = extracted;
1479
+ return {
1480
+ mode: 'rdp',
1481
+ ...rdpFields,
1482
+ host: extracted.host
1483
+ };
1484
+ }
1485
+ return {
1486
+ mode: 'local',
1487
+ displayId: extracted.displayId,
1488
+ headless: extracted.headless
1489
+ };
1490
+ }
1491
+ function shouldRetargetAgent(opts) {
1492
+ if (!opts) return false;
1493
+ if ('rdp' === opts.mode) return true;
1494
+ return void 0 !== opts.displayId || void 0 !== opts.headless;
1495
+ }
1496
+ function describeConnectTarget(opts) {
1497
+ if (opts?.mode === 'rdp') {
1498
+ const portSuffix = opts.port ? `:${opts.port}` : '';
1499
+ const userSuffix = opts.username ? ` as ${opts.username}` : '';
1500
+ return ` via RDP (${opts.host}${portSuffix}${userSuffix})`;
1501
+ }
1502
+ if (opts?.mode === 'local' && opts.displayId) return ` (Display: ${opts.displayId})`;
1503
+ return ' (Primary display)';
1504
+ }
1505
+ function getCliReportSessionTarget(opts) {
1506
+ if (opts?.mode === 'rdp') return `rdp:${opts.host}`;
1507
+ if (opts?.mode === 'local' && opts.displayId) return opts.displayId;
1508
+ return 'primary';
1509
+ }
847
1510
  class ComputerMidsceneTools extends BaseMidsceneTools {
848
1511
  getCliReportSessionName() {
849
1512
  return 'midscene-computer';
@@ -852,9 +1515,7 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
852
1515
  return new ComputerDevice({});
853
1516
  }
854
1517
  async ensureAgent(opts) {
855
- const displayId = opts?.displayId;
856
- const headless = opts?.headless;
857
- if (this.agent && (void 0 !== displayId || void 0 !== headless)) {
1518
+ if (this.agent && shouldRetargetAgent(opts)) {
858
1519
  try {
859
1520
  await this.agent.destroy?.();
860
1521
  } catch (error) {
@@ -863,8 +1524,20 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
863
1524
  this.agent = void 0;
864
1525
  }
865
1526
  if (this.agent) return this.agent;
866
- mcp_tools_debug('Creating Computer agent with displayId:', displayId || 'primary');
867
1527
  const reportOptions = this.readCliReportAgentOptions();
1528
+ if (opts?.mode === 'rdp') {
1529
+ mcp_tools_debug('Creating RDP Computer agent for host:', opts.host);
1530
+ const { mode: _mode, ...rdpFields } = opts;
1531
+ const agent = await agentForRDPComputer({
1532
+ ...rdpFields,
1533
+ ...reportOptions ?? {}
1534
+ });
1535
+ this.agent = agent;
1536
+ return agent;
1537
+ }
1538
+ const displayId = opts?.mode === 'local' ? opts.displayId : void 0;
1539
+ const headless = opts?.mode === 'local' ? opts.headless : void 0;
1540
+ mcp_tools_debug('Creating Computer agent with displayId:', displayId || 'primary');
868
1541
  const agentOpts = {
869
1542
  ...displayId ? {
870
1543
  displayId
@@ -882,12 +1555,12 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
882
1555
  return [
883
1556
  {
884
1557
  name: 'computer_connect',
885
- description: 'Connect to computer desktop. Provide displayId to connect to a specific display (use computer_list_displays to get available IDs). If not provided, uses the primary display.',
1558
+ description: "Connect to a computer desktop. Default (local) mode controls the local machine; pass displayId to target a specific local display (see computer_list_displays). Pass host to switch to RDP mode and connect to a remote Windows desktop via the RDP helper binary. RDP-related options (port/username/password/domain/securityProtocol/ignoreCertificate/adminSession/desktopWidth/desktopHeight) only take effect when host is set.",
886
1559
  schema: this.getAgentInitArgSchema(),
887
1560
  cli: this.getAgentInitArgCliMetadata(),
888
1561
  handler: async (args)=>{
889
1562
  const initArgs = this.extractAgentInitParam(args);
890
- const reportSession = this.createNewCliReportSession(initArgs?.displayId ?? 'primary');
1563
+ const reportSession = this.createNewCliReportSession(getCliReportSessionTarget(initArgs));
891
1564
  this.commitCliReportSession(reportSession);
892
1565
  if (this.agent) {
893
1566
  try {
@@ -903,7 +1576,7 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
903
1576
  content: [
904
1577
  {
905
1578
  type: 'text',
906
- text: `Connected to computer${initArgs?.displayId ? ` (Display: ${initArgs.displayId})` : ' (Primary display)'}`
1579
+ text: `Connected to computer${describeConnectTarget(initArgs)}`
907
1580
  },
908
1581
  ...this.buildScreenshotContent(screenshot)
909
1582
  ]
@@ -941,14 +1614,14 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
941
1614
  cli: {
942
1615
  preferBareKeys: true
943
1616
  },
944
- adapt: (extracted)=>extracted
1617
+ adapt: (extracted)=>adaptComputerInitArgs(extracted)
945
1618
  });
946
1619
  }
947
1620
  }
948
1621
  const tools = new ComputerMidsceneTools();
949
1622
  runToolsCLI(tools, 'midscene-computer', {
950
1623
  stripPrefix: 'computer_',
951
- version: "1.8.0",
1624
+ version: "1.8.1-beta-20260513084557.0",
952
1625
  extraCommands: createReportCliCommands()
953
1626
  }).catch((e)=>{
954
1627
  process.exit(reportCLIError(e));