npm - @midscene/computer - Versions diffs - 1.2.1-beta-20260112081017.0 - Mend

@midscene/computer 1.2.1-beta-20260112081017.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/LICENSE +21 -0
package/README.md +243 -0
package/dist/es/index.mjs +438 -0
package/dist/es/mcp-server.mjs +508 -0
package/dist/lib/index.js +498 -0
package/dist/lib/mcp-server.js +559 -0
package/dist/types/index.d.ts +70 -0
package/dist/types/mcp-server.d.ts +88 -0
package/package.json +48 -0
package/rslib.config.ts +26 -0
package/src/agent.ts +17 -0
package/src/device.ts +554 -0
package/src/index.ts +8 -0
package/src/mcp-server.ts +65 -0
package/src/mcp-tools.ts +96 -0
package/src/types/libnut.d.ts +36 -0
package/src/utils.ts +51 -0
package/tests/ai/ai-auto-todo.test.ts +85 -0
package/tests/ai/ai-shop.test.ts +56 -0
package/tests/ai/basic.test.ts +46 -0
package/tests/ai/keyboard.test.ts +66 -0
package/tests/ai/multi-display.test.ts +76 -0
package/tests/ai/test-utils.ts +31 -0
package/tests/ai/web-browser.test.ts +63 -0
package/tests/unit-test/agent.test.ts +34 -0
package/tests/unit-test/device.test.ts +53 -0
package/tsconfig.json +18 -0
package/tsconfig.tsbuildinfo +1 -0
package/vitest.config.ts +47 -0

package/src/mcp-tools.ts ADDED Viewed

@@ -0,0 +1,96 @@
+import { z } from '@midscene/core';
+import { getDebug } from '@midscene/shared/logger';
+import { BaseMidsceneTools, type ToolDefinition } from '@midscene/shared/mcp';
+import { type ComputerAgent, agentFromComputer } from './agent';
+import { ComputerDevice } from './device';
+const debug = getDebug('mcp:computer-tools');
+/**
+ * Computer-specific tools manager
+ * Extends BaseMidsceneTools to provide desktop automation tools
+ */
+export class ComputerMidsceneTools extends BaseMidsceneTools<ComputerAgent> {
+  protected createTemporaryDevice() {
+    // Create minimal temporary instance
+    return new ComputerDevice({});
+  }
+  protected async ensureAgent(displayId?: string): Promise<ComputerAgent> {
+    if (this.agent && displayId) {
+      // If a specific displayId is requested and we have an agent,
+      // destroy it to create a new one with the new display
+      try {
+        await this.agent.destroy?.();
+      } catch (error) {
+        debug('Failed to destroy agent during cleanup:', error);
+      }
+      this.agent = undefined;
+    }
+    if (this.agent) {
+      return this.agent;
+    }
+    debug('Creating Computer agent with displayId:', displayId || 'primary');
+    const opts = displayId ? { displayId } : undefined;
+    const agent = await agentFromComputer(opts);
+    this.agent = agent;
+    return agent;
+  }
+  /**
+   * Provide Computer-specific platform tools
+   */
+  protected preparePlatformTools(): ToolDefinition[] {
+    return [
+      {
+        name: 'computer_connect',
+        description:
+          'Connect to computer desktop. If displayId not provided, uses the primary display.',
+        schema: {
+          displayId: z
+            .string()
+            .optional()
+            .describe('Display ID (from list_displays)'),
+        },
+        handler: async ({ displayId }: { displayId?: string }) => {
+          const agent = await this.ensureAgent(displayId);
+          const screenshot = await agent.interface.screenshotBase64();
+          return {
+            content: [
+              {
+                type: 'text',
+                text: `Connected to computer${displayId ? ` (Display: ${displayId})` : ' (Primary display)'}`,
+              },
+              ...this.buildScreenshotContent(screenshot),
+            ],
+          };
+        },
+      },
+      {
+        name: 'computer_disconnect',
+        description: 'Disconnect from computer and release resources',
+        schema: {},
+        handler: this.createDisconnectHandler('computer'),
+      },
+      {
+        name: 'computer_list_displays',
+        description: 'List all available displays/monitors',
+        schema: {},
+        handler: async () => {
+          const displays = await ComputerDevice.listDisplays();
+          return {
+            content: [
+              {
+                type: 'text',
+                text: `Available displays:\n${displays.map((d) => `- ${d.name} (ID: ${d.id})${d.primary ? ' [PRIMARY]' : ''}`).join('\n')}`,
+              },
+            ],
+          };
+        },
+      },
+    ];
+  }
+}

package/src/types/libnut.d.ts ADDED Viewed

@@ -0,0 +1,36 @@
+declare module '@computer-use/libnut/dist/import_libnut' {
+  interface ScreenSize {
+    width: number;
+    height: number;
+  }
+  interface Point {
+    x: number;
+    y: number;
+  }
+  type MouseButton = 'left' | 'right' | 'middle';
+  type ToggleState = 'up' | 'down';
+  interface LibNut {
+    getScreenSize(): ScreenSize;
+    getMousePos(): Point;
+    moveMouse(x: number, y: number): void;
+    mouseClick(button?: MouseButton, double?: boolean): void;
+    mouseToggle(state: ToggleState, button?: MouseButton): void;
+    scrollMouse(x: number, y: number): void;
+    keyTap(key: string, modifiers?: string[]): void;
+    typeString(text: string): void;
+  }
+  export const libnut: LibNut;
+}
+declare module '@computer-use/libnut' {
+  interface ScreenSize {
+    width: number;
+    height: number;
+  }
+  export function getScreenSize(): ScreenSize;
+}

package/src/utils.ts ADDED Viewed

@@ -0,0 +1,51 @@
+import { ComputerDevice, type DisplayInfo } from './device';
+export interface EnvironmentCheck {
+  available: boolean;
+  error?: string;
+  platform: string;
+  displays: number;
+}
+/**
+ * Check if the computer environment is available
+ */
+export async function checkComputerEnvironment(): Promise<EnvironmentCheck> {
+  try {
+    const libnutModule = await import(
+      '@computer-use/libnut/dist/import_libnut'
+    );
+    const libnut = libnutModule.libnut;
+    const screenSize = libnut.getScreenSize();
+    if (!screenSize || screenSize.width <= 0) {
+      return {
+        available: false,
+        error: 'libnut cannot get screen size',
+        platform: process.platform,
+        displays: 0,
+      };
+    }
+    const displays = await ComputerDevice.listDisplays();
+    return {
+      available: true,
+      platform: process.platform,
+      displays: displays.length,
+    };
+  } catch (error) {
+    const errorMessage = error instanceof Error ? error.message : String(error);
+    return {
+      available: false,
+      error: errorMessage,
+      platform: process.platform,
+      displays: 0,
+    };
+  }
+}
+/**
+ * Get all connected displays
+ */
+export async function getConnectedDisplays(): Promise<DisplayInfo[]> {
+  return ComputerDevice.listDisplays();
+}

package/tests/ai/ai-auto-todo.test.ts ADDED Viewed

@@ -0,0 +1,85 @@
+import { beforeAll, describe, expect, it, vi } from 'vitest';
+import { type ComputerAgent, agentFromComputer } from '../../src';
+import { openBrowserAndNavigate } from './test-utils';
+vi.setConfig({
+  testTimeout: 120 * 1000,
+});
+const isCacheEnabled = process.env.MIDSCENE_CACHE;
+describe('computer todo app automation', () => {
+  let agent: ComputerAgent;
+  beforeAll(async () => {
+    agent = await agentFromComputer({
+      aiActionContext:
+        'You are testing a web application on a desktop browser.',
+    });
+  });
+  it(
+    'should automate todo list operations',
+    async () => {
+      if (isCacheEnabled) {
+        vi.setConfig({ testTimeout: 1000 * 1000 });
+      }
+      await openBrowserAndNavigate(
+        agent,
+        'https://todomvc.com/examples/react/dist/',
+      );
+      // Wait for page to load
+      await agent.aiAssert('The todo input box is visible');
+      // Add tasks
+      await agent.aiAct('Enter "Happy Birthday" in the task box');
+      await agent.aiAct(
+        'Enter "Learn JS today" in the task box, then press Enter to create',
+      );
+      await agent.aiAct(
+        'Enter "Learn Rust tomorrow" in the task box, then press Enter to create',
+      );
+      await agent.aiAct(
+        'Enter "Learning AI the day after tomorrow" in the task box, then press Enter to create',
+      );
+      // Verify tasks were created
+      const allTaskList = await agent.aiQuery<string[]>(
+        'string[], tasks in the list',
+      );
+      console.log('allTaskList', allTaskList);
+      expect(allTaskList).toContain('Learn JS today');
+      expect(allTaskList).toContain('Learn Rust tomorrow');
+      expect(allTaskList).toContain('Learning AI the day after tomorrow');
+      // Interact with tasks - hover to show delete button, then click it
+      await agent.aiAct(
+        'Move your mouse over the second item in the task list',
+      );
+      await agent.aiAct(
+        'Click the delete button to the right of the second task',
+      );
+      await agent.aiAct('Click the checkbox next to the second task');
+      await agent.aiAct(
+        'Click the "completed" Status button below the task list',
+      );
+      // Verify remaining tasks
+      const taskList = await agent.aiQuery<string[]>(
+        'string[], Extract all task names from the list',
+      );
+      expect(taskList.length).toBe(1);
+      expect(taskList[0]).toBe('Learning AI the day after tomorrow');
+      // Verify placeholder text
+      const placeholder = await agent.aiQuery(
+        'string, return the placeholder text in the input box',
+      );
+      expect(placeholder).toBe('What needs to be done?');
+    },
+    360 * 1000,
+  );
+});

package/tests/ai/ai-shop.test.ts ADDED Viewed

@@ -0,0 +1,56 @@
+import { sleep } from '@midscene/core/utils';
+import { beforeAll, describe, it, vi } from 'vitest';
+import { type ComputerAgent, agentFromComputer } from '../../src';
+import { openBrowserAndNavigate } from './test-utils';
+vi.setConfig({
+  testTimeout: 120 * 1000,
+});
+const isCacheEnabled = process.env.MIDSCENE_CACHE;
+describe('computer shop app automation', () => {
+  let agent: ComputerAgent;
+  beforeAll(async () => {
+    agent = await agentFromComputer({
+      aiActionContext:
+        'You are testing a web application on a desktop browser.',
+    });
+  });
+  it(
+    'should automate shop login and cart operations',
+    async () => {
+      if (isCacheEnabled) {
+        vi.setConfig({ testTimeout: 1000 * 1000 });
+      }
+      await openBrowserAndNavigate(agent, 'https://www.saucedemo.com/');
+      // Wait for page to load
+      await agent.aiAssert('The login form is visible');
+      // Login
+      await agent.aiAct('type "standard_user" in user name input');
+      await agent.aiAct('type "secret_sauce" in password input');
+      await agent.aiAct('click Login Button');
+      await sleep(2000);
+      // Check the login success
+      await agent.aiAssert('the page title is "Swag Labs"');
+      // Add to cart
+      await agent.aiAct('click "add to cart" for black t-shirt products');
+      await sleep(500);
+      // Click cart icon
+      await agent.aiAct('click right top cart icon');
+      await sleep(1000);
+      // Verify cart page loaded
+      await agent.aiAssert('The cart page is displayed');
+    },
+    360 * 1000,
+  );
+});

package/tests/ai/basic.test.ts ADDED Viewed

@@ -0,0 +1,46 @@
+import { sleep } from '@midscene/core/utils';
+import { beforeAll, describe, it, vi } from 'vitest';
+import { ComputerAgent, ComputerDevice } from '../../src';
+vi.setConfig({
+  testTimeout: 120 * 1000,
+});
+describe('computer basic operations', () => {
+  let agent: ComputerAgent;
+  beforeAll(async () => {
+    const device = new ComputerDevice({});
+    agent = new ComputerAgent(device, {
+      aiActionContext:
+        'You are controlling a desktop computer. This is a test environment.',
+    });
+    await device.connect();
+  });
+  it(
+    'basic desktop interactions',
+    async () => {
+      // Take screenshot and query screen info
+      const screenInfo = await agent.aiQuery(
+        '{width: number, height: number, hasContent: boolean}, get current screen resolution and check if screen has visible content',
+      );
+      console.log('Screen info:', screenInfo);
+      // Move mouse
+      await agent.aiAct('move mouse to the center of the screen');
+      await sleep(500);
+      // Verify screen has content
+      await agent.aiAssert('The screen has visible content');
+      // Test moving mouse to corners
+      await agent.aiAct('move mouse to the top-left corner of the screen');
+      await sleep(500);
+      await agent.aiAct('move mouse to the bottom-right corner of the screen');
+      await sleep(500);
+    },
+    360 * 1000,
+  );
+});

package/tests/ai/keyboard.test.ts ADDED Viewed

@@ -0,0 +1,66 @@
+import { sleep } from '@midscene/core/utils';
+import { beforeAll, describe, it, vi } from 'vitest';
+import { ComputerAgent, ComputerDevice } from '../../src';
+vi.setConfig({
+  testTimeout: 120 * 1000,
+});
+describe('computer keyboard operations', () => {
+  let agent: ComputerAgent;
+  beforeAll(async () => {
+    const device = new ComputerDevice({});
+    agent = new ComputerAgent(device, {
+      aiActionContext:
+        'You are testing keyboard operations on a desktop computer.',
+    });
+    await device.connect();
+  });
+  it(
+    'keyboard shortcuts test',
+    async () => {
+      const isMac = process.platform === 'darwin';
+      // Take screenshot to verify current screen state
+      const initialState = await agent.aiQuery(
+        '{hasVisibleContent: boolean}, check if there is visible content on screen',
+      );
+      console.log('Initial screen state:', initialState);
+      // Verify screen has content
+      await agent.aiAssert('The screen has visible content');
+      // Test mouse movement
+      await agent.aiAct('move mouse to center of screen');
+      await sleep(300);
+      // Test modifier key combinations
+      if (isMac) {
+        // Cmd+Tab to show app switcher
+        await agent.aiAct('press Command+Tab');
+        await sleep(500);
+        // Press Command alone to release and dismiss app switcher
+        await agent.aiAct('press Command');
+        await sleep(300);
+      } else {
+        // Alt+Tab to show app switcher
+        await agent.aiAct('press Alt+Tab');
+        await sleep(500);
+        // Click to dismiss (Alt alone may not work on Windows)
+        await agent.aiAct('click mouse');
+        await sleep(300);
+      }
+      // Take screenshot to verify final state
+      const finalState = await agent.aiQuery(
+        '{hasVisibleContent: boolean}, check current screen state',
+      );
+      console.log('Final screen state:', finalState);
+      await agent.aiAssert('Screen content is still visible');
+    },
+    360 * 1000,
+  );
+});

package/tests/ai/multi-display.test.ts ADDED Viewed

@@ -0,0 +1,76 @@
+import { sleep } from '@midscene/core/utils';
+import { describe, it, vi } from 'vitest';
+import { ComputerDevice, agentFromComputer } from '../../src';
+vi.setConfig({
+  testTimeout: 120 * 1000,
+});
+describe('computer multi display', () => {
+  it(
+    'connect to multiple displays',
+    async () => {
+      // List all displays
+      const displays = await ComputerDevice.listDisplays();
+      console.log('Available displays:', displays);
+      if (displays.length < 2) {
+        console.warn(
+          `Only ${displays.length} display(s) found, need at least 2 for multi-display test`,
+        );
+        // Still test single display
+        if (displays.length === 1) {
+          const agent = await agentFromComputer({
+            displayId: displays[0].id,
+          });
+          await agent.aiAct('move mouse to center of screen');
+          await agent.aiAssert('Screen has visible content');
+        }
+        return;
+      }
+      // Connect to first display
+      const display1 = displays[0];
+      console.log(
+        `Connecting to display 1: ${display1.name} (ID: ${display1.id})`,
+      );
+      const agent1 = await agentFromComputer({
+        displayId: display1.id,
+        aiActionContext: `You are controlling display 1: ${display1.name}`,
+      });
+      // Operate on first display
+      await agent1.aiAct('move mouse to center of screen');
+      await sleep(500);
+      const screen1Info = await agent1.aiQuery(
+        '{hasContent: boolean}, check if display has visible content',
+      );
+      console.log('Display 1 info:', screen1Info);
+      // Connect to second display
+      const display2 = displays[1];
+      console.log(
+        `Connecting to display 2: ${display2.name} (ID: ${display2.id})`,
+      );
+      const agent2 = await agentFromComputer({
+        displayId: display2.id,
+        aiActionContext: `You are controlling display 2: ${display2.name}`,
+      });
+      // Operate on second display
+      await agent2.aiAct('move mouse to center of screen');
+      await sleep(500);
+      const screen2Info = await agent2.aiQuery(
+        '{hasContent: boolean}, check if display has visible content',
+      );
+      console.log('Display 2 info:', screen2Info);
+      // Verify both displays have content
+      await agent1.aiAssert('This display has visible content');
+      await agent2.aiAssert('This display has visible content');
+      console.log('Multi-display test completed successfully');
+    },
+    360 * 1000,
+  );
+});

package/tests/ai/test-utils.ts ADDED Viewed

@@ -0,0 +1,31 @@
+import { sleep } from '@midscene/core/utils';
+import type { ComputerAgent } from '../../src';
+const IS_MAC = process.platform === 'darwin';
+/**
+ * Opens a browser and navigates to the specified URL
+ */
+export async function openBrowserAndNavigate(
+  agent: ComputerAgent,
+  url: string,
+): Promise<void> {
+  if (IS_MAC) {
+    await agent.aiAct('press Cmd+Space');
+    await sleep(500);
+    await agent.aiAct('type "Safari" and press Enter');
+    await sleep(2000);
+    await agent.aiAct('press Cmd+L to focus address bar');
+  } else {
+    await agent.aiAct('press Windows key');
+    await sleep(500);
+    await agent.aiAct('type "Chrome" and press Enter');
+    await sleep(2000);
+    await agent.aiAct('press Ctrl+L to focus address bar');
+  }
+  await sleep(300);
+  await agent.aiAct(`type "${url}"`);
+  await agent.aiAct('press Enter');
+  await sleep(3000);
+}

package/tests/ai/web-browser.test.ts ADDED Viewed

@@ -0,0 +1,63 @@
+import { sleep } from '@midscene/core/utils';
+import { beforeAll, describe, it, vi } from 'vitest';
+import { ComputerAgent, ComputerDevice } from '../../src';
+vi.setConfig({
+  testTimeout: 240 * 1000,
+});
+describe('computer web browser automation', () => {
+  let agent: ComputerAgent;
+  beforeAll(async () => {
+    const device = new ComputerDevice({});
+    agent = new ComputerAgent(device, {
+      aiActionContext:
+        'You are automating a web browser on a desktop computer. If any popup appears, close it.',
+    });
+    await device.connect();
+  });
+  it(
+    'open browser and navigate',
+    async () => {
+      const isMac = process.platform === 'darwin';
+      // Open browser (using platform-specific shortcuts)
+      if (isMac) {
+        await agent.aiAct('press Cmd+Space to open Spotlight');
+        await sleep(1000);
+        await agent.aiAct('type "Safari" and press Enter');
+      } else {
+        await agent.aiAct('press Windows key');
+        await sleep(1000);
+        await agent.aiAct('type "Chrome" and press Enter');
+      }
+      await sleep(3000);
+      // Wait for browser to open
+      await agent.aiWaitFor('Browser window is open');
+      // Navigate to website
+      await agent.aiAct('click on address bar and type "example.com"');
+      await sleep(1000);
+      await agent.aiAct('press Enter');
+      await sleep(3000);
+      // Verify page loaded
+      await agent.aiWaitFor('Page has loaded');
+      // Extract page info
+      const pageInfo = await agent.aiQuery(
+        '{title: string, hasContent: boolean}, extract page title and check if content exists',
+      );
+      console.log('Page info:', pageInfo);
+      // Assert page content
+      await agent.aiAssert('The page has text content');
+    },
+    720 * 1000,
+  );
+});

package/tests/unit-test/agent.test.ts ADDED Viewed

@@ -0,0 +1,34 @@
+import { describe, expect, it } from 'vitest';
+import { ComputerAgent, ComputerDevice } from '../../src';
+describe('ComputerAgent', () => {
+  it('should create agent instance', () => {
+    const device = new ComputerDevice({});
+    const agent = new ComputerAgent(device);
+    expect(agent).toBeDefined();
+    expect(agent.interface).toBe(device);
+  });
+  it('should create agent with options', () => {
+    const device = new ComputerDevice({ displayId: 'test' });
+    const agent = new ComputerAgent(device, {
+      aiActionContext: 'Test context',
+    });
+    expect(agent).toBeDefined();
+  });
+  it('should create agent with custom actions', () => {
+    const device = new ComputerDevice({
+      customActions: [],
+    });
+    const agent = new ComputerAgent(device);
+    expect(agent).toBeDefined();
+    expect(agent.interface).toBeDefined();
+  });
+  // Note: Tests that require actual libnut functionality (like agentFromComputer with connect)
+  // should be run as AI tests or integration tests where native modules are available
+});