@midscene/computer 1.2.1-beta-20260112081017.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024-present Bytedance, Inc. and its affiliates.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,243 @@
1
+ # @midscene/computer
2
+
3
+ Midscene.js Computer Desktop Automation - AI-powered desktop automation for Windows, macOS, and Linux.
4
+
5
+ ## Features
6
+
7
+ - 🖥️ **Desktop Automation**: Control mouse, keyboard, and screen
8
+ - 📸 **Screenshot Capture**: Take screenshots of any display
9
+ - 🖱️ **Mouse Operations**: Click, double-click, right-click, hover, drag & drop
10
+ - ⌨️ **Keyboard Input**: Type text, press keys, shortcuts
11
+ - 📜 **Scroll Operations**: Scroll in any direction
12
+ - 🖼️ **Multi-Display Support**: Work with multiple monitors
13
+ - 🤖 **AI-Powered**: Use natural language to control your desktop
14
+ - 🔌 **MCP Server**: Expose capabilities via Model Context Protocol
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ npm install @midscene/computer
20
+ # or
21
+ pnpm add @midscene/computer
22
+ ```
23
+
24
+ ### Platform Requirements
25
+
26
+ This package uses native modules for desktop control:
27
+ - `screenshot-desktop`: For capturing screenshots
28
+ - `@computer-use/libnut`: For mouse and keyboard control
29
+
30
+ These modules require compilation on installation. Make sure you have the necessary build tools:
31
+
32
+ **macOS**: Install Xcode Command Line Tools
33
+ ```bash
34
+ xcode-select --install
35
+ ```
36
+
37
+ **Linux**: Install build essentials and ImageMagick
38
+ ```bash
39
+ # Ubuntu/Debian
40
+ sudo apt-get install build-essential libx11-dev libxtst-dev libpng-dev imagemagick
41
+
42
+ # Fedora/RHEL
43
+ sudo dnf install gcc-c++ libX11-devel libXtst-devel libpng-devel ImageMagick
44
+
45
+ # Arch
46
+ sudo pacman -S base-devel libx11 libxtst libpng imagemagick
47
+ ```
48
+
49
+ **Note**: ImageMagick is required for screenshot capture on Linux.
50
+
51
+ **Windows**: Install Windows Build Tools
52
+ ```bash
53
+ npm install --global windows-build-tools
54
+ ```
55
+
56
+ ## Quick Start
57
+
58
+ ### Basic Usage
59
+
60
+ ```typescript
61
+ import { agentFromComputer } from '@midscene/computer';
62
+
63
+ // Create an agent
64
+ const agent = await agentFromComputer({
65
+ aiActionContext: 'You are controlling a desktop computer.',
66
+ });
67
+
68
+ // Use AI to perform actions
69
+ await agent.aiAct('move mouse to center of screen');
70
+ await agent.aiAct('click on the desktop');
71
+ await agent.aiAct('type "Hello World"');
72
+
73
+ // Query information
74
+ const screenInfo = await agent.aiQuery(
75
+ '{width: number, height: number}, get screen resolution',
76
+ );
77
+
78
+ // Assert conditions
79
+ await agent.aiAssert('There is a desktop visible');
80
+ ```
81
+
82
+ ### Multi-Display Support
83
+
84
+ ```typescript
85
+ import { ComputerDevice, agentFromComputer } from '@midscene/computer';
86
+
87
+ // List all displays
88
+ const displays = await ComputerDevice.listDisplays();
89
+ console.log('Available displays:', displays);
90
+
91
+ // Connect to a specific display
92
+ const agent = await agentFromComputer({
93
+ displayId: displays[0].id,
94
+ });
95
+ ```
96
+
97
+ ### Environment Check
98
+
99
+ ```typescript
100
+ import { checkComputerEnvironment } from '@midscene/computer';
101
+
102
+ const env = await checkComputerEnvironment();
103
+ console.log('Platform:', env.platform);
104
+ console.log('Available:', env.available);
105
+ console.log('Displays:', env.displays);
106
+ ```
107
+
108
+ ## Available Actions
109
+
110
+ The ComputerDevice supports the following actions:
111
+
112
+ - **Tap**: Single click at element center
113
+ - **DoubleClick**: Double click at element center
114
+ - **RightClick**: Right click at element center
115
+ - **Hover**: Move mouse to element center
116
+ - **Input**: Type text with different modes (replace/clear/append)
117
+ - **Scroll**: Scroll in any direction (up/down/left/right)
118
+ - **KeyboardPress**: Press keyboard keys with modifiers
119
+ - **DragAndDrop**: Drag from one element to another
120
+ - **ClearInput**: Clear input field content
121
+ - **ListDisplays**: Get all available displays
122
+
123
+ ## Platform-Specific Shortcuts
124
+
125
+ ### macOS
126
+ - Modifier key: `Cmd` (Command)
127
+ - Open search: `Cmd+Space`
128
+ - Select all: `Cmd+A`
129
+ - Copy: `Cmd+C`
130
+ - Paste: `Cmd+V`
131
+
132
+ ### Windows/Linux
133
+ - Modifier key: `Ctrl` (Control)
134
+ - Open search: `Windows key` or `Super key`
135
+ - Select all: `Ctrl+A`
136
+ - Copy: `Ctrl+C`
137
+ - Paste: `Ctrl+V`
138
+
139
+ ## Testing
140
+
141
+ ### Run Unit Tests
142
+
143
+ ```bash
144
+ pnpm test
145
+ ```
146
+
147
+ ### Run AI Tests
148
+
149
+ ```bash
150
+ # Set AI_TEST_TYPE environment variable
151
+ AI_TEST_TYPE=computer pnpm test:ai
152
+ ```
153
+
154
+ Available AI tests:
155
+ - `basic.test.ts`: Basic desktop interactions
156
+ - `multi-display.test.ts`: Multi-display support
157
+ - `web-browser.test.ts`: Browser automation
158
+ - `text-editor.test.ts`: Text editor operations
159
+
160
+ ## MCP Server
161
+
162
+ Start the MCP server for AI assistant integration:
163
+
164
+ ```typescript
165
+ import { mcpServerForAgent } from '@midscene/computer/mcp-server';
166
+ import { agentFromComputer } from '@midscene/computer';
167
+
168
+ const agent = await agentFromComputer();
169
+ const { server } = mcpServerForAgent(agent);
170
+ await server.launch();
171
+ ```
172
+
173
+ Available MCP tools:
174
+ - `computer_connect`: Connect to desktop display
175
+ - `computer_list_displays`: List all available displays
176
+ - Plus all standard Midscene tools (aiAct, aiQuery, aiAssert, etc.)
177
+
178
+ ## Architecture
179
+
180
+ This package follows the same architecture pattern as `@midscene/android` and `@midscene/ios`:
181
+
182
+ ```
183
+ packages/computer/
184
+ ├── src/
185
+ │ ├── device.ts # ComputerDevice - core device implementation
186
+ │ ├── agent.ts # ComputerAgent - agent wrapper
187
+ │ ├── utils.ts # Utility functions
188
+ │ ├── mcp-server.ts # MCP server
189
+ │ └── mcp-tools.ts # MCP tools definitions
190
+ ├── tests/
191
+ │ ├── unit-test/ # Unit tests (no native dependencies)
192
+ │ └── ai/ # AI-powered integration tests
193
+ └── README.md
194
+ ```
195
+
196
+ ## API Reference
197
+
198
+ ### ComputerDevice
199
+
200
+ ```typescript
201
+ class ComputerDevice implements AbstractInterface {
202
+ constructor(options?: ComputerDeviceOpt);
203
+
204
+ static listDisplays(): Promise<DisplayInfo[]>;
205
+
206
+ async connect(): Promise<void>;
207
+ async screenshotBase64(): Promise<string>;
208
+ async size(): Promise<Size>;
209
+ actionSpace(): DeviceAction<any>[];
210
+ async destroy(): Promise<void>;
211
+ }
212
+ ```
213
+
214
+ ### ComputerAgent
215
+
216
+ ```typescript
217
+ class ComputerAgent extends PageAgent<ComputerDevice> {
218
+ // Inherits all PageAgent methods
219
+ async aiAct(action: string): Promise<void>;
220
+ async aiQuery(query: string): Promise<any>;
221
+ async aiAssert(assertion: string): Promise<void>;
222
+ async aiWaitFor(condition: string): Promise<void>;
223
+ }
224
+ ```
225
+
226
+ ### Factory Functions
227
+
228
+ ```typescript
229
+ async function agentFromComputer(
230
+ opts?: ComputerAgentOpt
231
+ ): Promise<ComputerAgent>;
232
+
233
+ async function checkComputerEnvironment(): Promise<EnvironmentCheck>;
234
+ async function getConnectedDisplays(): Promise<DisplayInfo[]>;
235
+ ```
236
+
237
+ ## License
238
+
239
+ MIT
240
+
241
+ ## Contributing
242
+
243
+ See the main [Midscene.js repository](https://github.com/web-infra-dev/midscene) for contributing guidelines.
@@ -0,0 +1,438 @@
1
+ import node_assert from "node:assert";
2
+ import { getMidsceneLocationSchema, z } from "@midscene/core";
3
+ import { defineAction, defineActionClearInput, defineActionDoubleClick, defineActionDragAndDrop, defineActionHover, defineActionKeyboardPress, defineActionRightClick, defineActionScroll, defineActionTap } from "@midscene/core/device";
4
+ import { sleep } from "@midscene/core/utils";
5
+ import { createImgBase64ByFormat } from "@midscene/shared/img";
6
+ import { getDebug } from "@midscene/shared/logger";
7
+ import screenshot_desktop from "screenshot-desktop";
8
+ import { Agent } from "@midscene/core/agent";
9
+ import { overrideAIConfig } from "@midscene/shared/env";
10
+ function _define_property(obj, key, value) {
11
+ if (key in obj) Object.defineProperty(obj, key, {
12
+ value: value,
13
+ enumerable: true,
14
+ configurable: true,
15
+ writable: true
16
+ });
17
+ else obj[key] = value;
18
+ return obj;
19
+ }
20
+ const SMOOTH_MOVE_STEPS_TAP = 8;
21
+ const SMOOTH_MOVE_STEPS_HOVER = 10;
22
+ const SMOOTH_MOVE_DELAY_TAP = 8;
23
+ const SMOOTH_MOVE_DELAY_HOVER = 10;
24
+ const HOVER_EFFECT_WAIT = 300;
25
+ const CLICK_HOLD_DURATION = 50;
26
+ const INPUT_FOCUS_DELAY = 300;
27
+ const INPUT_CLEAR_DELAY = 150;
28
+ const SCROLL_REPEAT_COUNT = 10;
29
+ const SCROLL_STEP_DELAY = 100;
30
+ const SCROLL_COMPLETE_DELAY = 500;
31
+ let device_libnut = null;
32
+ let libnutLoadError = null;
33
+ async function getLibnut() {
34
+ if (device_libnut) return device_libnut;
35
+ if (libnutLoadError) throw libnutLoadError;
36
+ try {
37
+ const libnutModule = await import("@computer-use/libnut/dist/import_libnut");
38
+ device_libnut = libnutModule.libnut;
39
+ if (!device_libnut) throw new Error('libnut module loaded but libnut object is undefined');
40
+ return device_libnut;
41
+ } catch (error) {
42
+ libnutLoadError = error;
43
+ throw new Error(`Failed to load @computer-use/libnut. Make sure it is properly installed and compiled for your platform. Error: ${error}`);
44
+ }
45
+ }
46
+ const debugDevice = getDebug('computer:device');
47
+ async function smoothMoveMouse(targetX, targetY, steps, stepDelay) {
48
+ node_assert(device_libnut, 'libnut not initialized');
49
+ const currentPos = device_libnut.getMousePos();
50
+ for(let i = 1; i <= steps; i++){
51
+ const stepX = Math.round(currentPos.x + (targetX - currentPos.x) * i / steps);
52
+ const stepY = Math.round(currentPos.y + (targetY - currentPos.y) * i / steps);
53
+ device_libnut.moveMouse(stepX, stepY);
54
+ await sleep(stepDelay);
55
+ }
56
+ }
57
+ const KEY_NAME_MAP = {
58
+ windows: 'win',
59
+ win: 'win',
60
+ ctrl: 'control',
61
+ esc: 'escape',
62
+ del: 'delete',
63
+ ins: 'insert',
64
+ pgup: 'pageup',
65
+ pgdn: 'pagedown',
66
+ arrowup: 'up',
67
+ arrowdown: 'down',
68
+ arrowleft: 'left',
69
+ arrowright: 'right',
70
+ volumedown: 'audio_vol_down',
71
+ volumeup: 'audio_vol_up',
72
+ mediavolumedown: 'audio_vol_down',
73
+ mediavolumeup: 'audio_vol_up',
74
+ mute: 'audio_mute',
75
+ mediamute: 'audio_mute',
76
+ mediaplay: 'audio_play',
77
+ mediapause: 'audio_pause',
78
+ mediaplaypause: 'audio_play',
79
+ mediastop: 'audio_stop',
80
+ medianexttrack: 'audio_next',
81
+ mediaprevioustrack: 'audio_prev',
82
+ medianext: 'audio_next',
83
+ mediaprev: 'audio_prev'
84
+ };
85
+ const PRIMARY_KEY_MAP = {
86
+ command: 'cmd',
87
+ cmd: 'cmd',
88
+ meta: 'meta',
89
+ control: 'control',
90
+ ctrl: 'control',
91
+ shift: 'shift',
92
+ alt: 'alt',
93
+ option: 'alt'
94
+ };
95
+ function normalizeKeyName(key) {
96
+ const lowerKey = key.toLowerCase();
97
+ return KEY_NAME_MAP[lowerKey] || lowerKey;
98
+ }
99
+ function normalizePrimaryKey(key) {
100
+ const lowerKey = key.toLowerCase();
101
+ if (PRIMARY_KEY_MAP[lowerKey]) return PRIMARY_KEY_MAP[lowerKey];
102
+ return KEY_NAME_MAP[lowerKey] || lowerKey;
103
+ }
104
+ class ComputerDevice {
105
+ describe() {
106
+ return this.description || 'Computer Device';
107
+ }
108
+ static async listDisplays() {
109
+ try {
110
+ const displays = await screenshot_desktop.listDisplays();
111
+ return displays.map((d)=>({
112
+ id: String(d.id),
113
+ name: d.name || `Display ${d.id}`,
114
+ primary: d.primary || false
115
+ }));
116
+ } catch (error) {
117
+ debugDevice(`Failed to list displays: ${error}`);
118
+ return [];
119
+ }
120
+ }
121
+ async connect() {
122
+ debugDevice('Connecting to computer device');
123
+ try {
124
+ device_libnut = await getLibnut();
125
+ const size = await this.size();
126
+ const displays = await ComputerDevice.listDisplays();
127
+ this.description = `
128
+ Type: Computer
129
+ Platform: ${process.platform}
130
+ Display: ${this.displayId || 'Primary'}
131
+ Screen Size: ${size.width}x${size.height}
132
+ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ') : 'Unknown'}
133
+ `;
134
+ debugDevice('Computer device connected', this.description);
135
+ } catch (error) {
136
+ debugDevice(`Failed to connect: ${error}`);
137
+ throw new Error(`Unable to connect to computer device: ${error}`);
138
+ }
139
+ }
140
+ async screenshotBase64() {
141
+ debugDevice('Taking screenshot', {
142
+ displayId: this.displayId
143
+ });
144
+ try {
145
+ const options = {
146
+ format: 'png'
147
+ };
148
+ if (void 0 !== this.displayId) if ('darwin' === process.platform) {
149
+ const screenIndex = Number(this.displayId);
150
+ if (!Number.isNaN(screenIndex)) options.screen = screenIndex;
151
+ } else options.screen = this.displayId;
152
+ debugDevice('Screenshot options', options);
153
+ const buffer = await screenshot_desktop(options);
154
+ return createImgBase64ByFormat('png', buffer.toString('base64'));
155
+ } catch (error) {
156
+ debugDevice(`Screenshot failed: ${error}`);
157
+ throw new Error(`Failed to take screenshot: ${error}`);
158
+ }
159
+ }
160
+ async size() {
161
+ node_assert(device_libnut, 'libnut not initialized');
162
+ try {
163
+ const screenSize = device_libnut.getScreenSize();
164
+ return {
165
+ width: screenSize.width,
166
+ height: screenSize.height,
167
+ dpr: 1
168
+ };
169
+ } catch (error) {
170
+ debugDevice(`Failed to get screen size: ${error}`);
171
+ throw new Error(`Failed to get screen size: ${error}`);
172
+ }
173
+ }
174
+ actionSpace() {
175
+ const defaultActions = [
176
+ defineActionTap(async (param)=>{
177
+ node_assert(device_libnut, 'libnut not initialized');
178
+ const element = param.locate;
179
+ node_assert(element, 'Element not found, cannot tap');
180
+ const [x, y] = element.center;
181
+ const targetX = Math.round(x);
182
+ const targetY = Math.round(y);
183
+ await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
184
+ device_libnut.mouseToggle('down', 'left');
185
+ await sleep(CLICK_HOLD_DURATION);
186
+ device_libnut.mouseToggle('up', 'left');
187
+ }),
188
+ defineActionDoubleClick(async (param)=>{
189
+ node_assert(device_libnut, 'libnut not initialized');
190
+ const element = param.locate;
191
+ node_assert(element, 'Element not found, cannot double click');
192
+ const [x, y] = element.center;
193
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
194
+ device_libnut.mouseClick('left', true);
195
+ }),
196
+ defineActionRightClick(async (param)=>{
197
+ node_assert(device_libnut, 'libnut not initialized');
198
+ const element = param.locate;
199
+ node_assert(element, 'Element not found, cannot right click');
200
+ const [x, y] = element.center;
201
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
202
+ device_libnut.mouseClick('right');
203
+ }),
204
+ defineActionHover(async (param)=>{
205
+ node_assert(device_libnut, 'libnut not initialized');
206
+ const element = param.locate;
207
+ node_assert(element, 'Element not found, cannot hover');
208
+ const [x, y] = element.center;
209
+ const targetX = Math.round(x);
210
+ const targetY = Math.round(y);
211
+ await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_HOVER, SMOOTH_MOVE_DELAY_HOVER);
212
+ await sleep(HOVER_EFFECT_WAIT);
213
+ }),
214
+ defineAction({
215
+ name: 'Input',
216
+ description: 'Input text into the input field',
217
+ interfaceAlias: 'aiInput',
218
+ paramSchema: z.object({
219
+ value: z.string().describe('The text to input'),
220
+ mode: z["enum"]([
221
+ 'replace',
222
+ 'clear',
223
+ 'append'
224
+ ]).default('replace').optional().describe('Input mode: replace, clear, or append'),
225
+ locate: getMidsceneLocationSchema().describe('The input field to be filled').optional()
226
+ }),
227
+ call: async (param)=>{
228
+ node_assert(device_libnut, 'libnut not initialized');
229
+ const element = param.locate;
230
+ if (element && 'append' !== param.mode) {
231
+ const [x, y] = element.center;
232
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
233
+ device_libnut.mouseClick('left');
234
+ await sleep(INPUT_FOCUS_DELAY);
235
+ const modifier = 'darwin' === process.platform ? 'command' : 'control';
236
+ device_libnut.keyTap('a', [
237
+ modifier
238
+ ]);
239
+ await sleep(50);
240
+ device_libnut.keyTap('backspace');
241
+ await sleep(INPUT_CLEAR_DELAY);
242
+ }
243
+ if ('clear' === param.mode) return;
244
+ if (!param.value) return;
245
+ device_libnut.typeString(param.value);
246
+ }
247
+ }),
248
+ defineActionScroll(async (param)=>{
249
+ node_assert(device_libnut, 'libnut not initialized');
250
+ if (param.locate) {
251
+ const element = param.locate;
252
+ const [x, y] = element.center;
253
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
254
+ }
255
+ const scrollType = param?.scrollType;
256
+ const scrollToEdgeActions = {
257
+ scrollToTop: [
258
+ 0,
259
+ 10
260
+ ],
261
+ scrollToBottom: [
262
+ 0,
263
+ -10
264
+ ],
265
+ scrollToLeft: [
266
+ -10,
267
+ 0
268
+ ],
269
+ scrollToRight: [
270
+ 10,
271
+ 0
272
+ ]
273
+ };
274
+ const edgeAction = scrollToEdgeActions[scrollType || ''];
275
+ if (edgeAction) {
276
+ const [dx, dy] = edgeAction;
277
+ for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
278
+ device_libnut.scrollMouse(dx, dy);
279
+ await sleep(SCROLL_STEP_DELAY);
280
+ }
281
+ return;
282
+ }
283
+ if ('singleAction' === scrollType || !scrollType) {
284
+ const distance = param?.distance || 500;
285
+ const ticks = Math.ceil(distance / 100);
286
+ const direction = param?.direction || 'down';
287
+ const directionMap = {
288
+ up: [
289
+ 0,
290
+ ticks
291
+ ],
292
+ down: [
293
+ 0,
294
+ -ticks
295
+ ],
296
+ left: [
297
+ -ticks,
298
+ 0
299
+ ],
300
+ right: [
301
+ ticks,
302
+ 0
303
+ ]
304
+ };
305
+ const [dx, dy] = directionMap[direction] || [
306
+ 0,
307
+ -ticks
308
+ ];
309
+ device_libnut.scrollMouse(dx, dy);
310
+ await sleep(SCROLL_COMPLETE_DELAY);
311
+ return;
312
+ }
313
+ throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
314
+ }),
315
+ defineActionKeyboardPress(async (param)=>{
316
+ node_assert(device_libnut, 'libnut not initialized');
317
+ if (param.locate) {
318
+ const [x, y] = param.locate.center;
319
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
320
+ device_libnut.mouseClick('left');
321
+ await sleep(50);
322
+ }
323
+ const keys = param.keyName.split('+');
324
+ const modifiers = keys.slice(0, -1).map(normalizeKeyName);
325
+ const key = normalizePrimaryKey(keys[keys.length - 1]);
326
+ debugDevice('KeyboardPress', {
327
+ original: param.keyName,
328
+ key,
329
+ modifiers
330
+ });
331
+ if (modifiers.length > 0) device_libnut.keyTap(key, modifiers);
332
+ else device_libnut.keyTap(key);
333
+ }),
334
+ defineActionDragAndDrop(async (param)=>{
335
+ node_assert(device_libnut, 'libnut not initialized');
336
+ const from = param.from;
337
+ const to = param.to;
338
+ node_assert(from, 'missing "from" param for drag and drop');
339
+ node_assert(to, 'missing "to" param for drag and drop');
340
+ const [fromX, fromY] = from.center;
341
+ const [toX, toY] = to.center;
342
+ device_libnut.moveMouse(Math.round(fromX), Math.round(fromY));
343
+ device_libnut.mouseToggle('down', 'left');
344
+ await sleep(100);
345
+ device_libnut.moveMouse(Math.round(toX), Math.round(toY));
346
+ await sleep(100);
347
+ device_libnut.mouseToggle('up', 'left');
348
+ }),
349
+ defineActionClearInput(async (param)=>{
350
+ node_assert(device_libnut, 'libnut not initialized');
351
+ const element = param.locate;
352
+ node_assert(element, 'Element not found, cannot clear input');
353
+ const [x, y] = element.center;
354
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
355
+ device_libnut.mouseClick('left');
356
+ await sleep(100);
357
+ const modifier = 'darwin' === process.platform ? 'command' : 'control';
358
+ device_libnut.keyTap('a', [
359
+ modifier
360
+ ]);
361
+ device_libnut.keyTap('backspace');
362
+ await sleep(50);
363
+ })
364
+ ];
365
+ const platformActions = Object.values(createPlatformActions());
366
+ const customActions = this.options?.customActions || [];
367
+ return [
368
+ ...defaultActions,
369
+ ...platformActions,
370
+ ...customActions
371
+ ];
372
+ }
373
+ async destroy() {
374
+ if (this.destroyed) return;
375
+ this.destroyed = true;
376
+ debugDevice('Computer device destroyed');
377
+ }
378
+ async url() {
379
+ return '';
380
+ }
381
+ constructor(options){
382
+ _define_property(this, "interfaceType", 'computer');
383
+ _define_property(this, "options", void 0);
384
+ _define_property(this, "displayId", void 0);
385
+ _define_property(this, "description", void 0);
386
+ _define_property(this, "destroyed", false);
387
+ _define_property(this, "uri", void 0);
388
+ this.options = options;
389
+ this.displayId = options?.displayId;
390
+ }
391
+ }
392
+ function createPlatformActions() {
393
+ return {
394
+ ListDisplays: defineAction({
395
+ name: 'ListDisplays',
396
+ description: 'List all available displays/monitors',
397
+ call: async ()=>await ComputerDevice.listDisplays()
398
+ })
399
+ };
400
+ }
401
+ class ComputerAgent extends Agent {
402
+ }
403
+ async function agentFromComputer(opts) {
404
+ const device = new ComputerDevice(opts || {});
405
+ await device.connect();
406
+ return new ComputerAgent(device, opts);
407
+ }
408
+ async function checkComputerEnvironment() {
409
+ try {
410
+ const libnutModule = await import("@computer-use/libnut/dist/import_libnut");
411
+ const libnut = libnutModule.libnut;
412
+ const screenSize = libnut.getScreenSize();
413
+ if (!screenSize || screenSize.width <= 0) return {
414
+ available: false,
415
+ error: 'libnut cannot get screen size',
416
+ platform: process.platform,
417
+ displays: 0
418
+ };
419
+ const displays = await ComputerDevice.listDisplays();
420
+ return {
421
+ available: true,
422
+ platform: process.platform,
423
+ displays: displays.length
424
+ };
425
+ } catch (error) {
426
+ const errorMessage = error instanceof Error ? error.message : String(error);
427
+ return {
428
+ available: false,
429
+ error: errorMessage,
430
+ platform: process.platform,
431
+ displays: 0
432
+ };
433
+ }
434
+ }
435
+ async function getConnectedDisplays() {
436
+ return ComputerDevice.listDisplays();
437
+ }
438
+ export { ComputerAgent, ComputerDevice, agentFromComputer, checkComputerEnvironment, getConnectedDisplays, overrideAIConfig };