@midscene/computer 1.4.6 → 1.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/es/cli.mjs CHANGED
@@ -4,12 +4,80 @@ import { getDebug } from "@midscene/shared/logger";
4
4
  import { BaseMidsceneTools } from "@midscene/shared/mcp";
5
5
  import { Agent } from "@midscene/core/agent";
6
6
  import node_assert from "node:assert";
7
- import { execSync } from "node:child_process";
7
+ import { execSync, spawn } from "node:child_process";
8
8
  import { createRequire } from "node:module";
9
9
  import { actionHoverParamSchema, defineAction, defineActionClearInput, defineActionDoubleClick, defineActionDragAndDrop, defineActionKeyboardPress, defineActionRightClick, defineActionScroll, defineActionTap } from "@midscene/core/device";
10
10
  import { sleep } from "@midscene/core/utils";
11
11
  import { createImgBase64ByFormat } from "@midscene/shared/img";
12
12
  import screenshot_desktop from "screenshot-desktop";
13
+ import { existsSync } from "node:fs";
14
+ const debugXvfb = getDebug('computer:xvfb');
15
+ function checkXvfbInstalled() {
16
+ try {
17
+ execSync('which Xvfb', {
18
+ stdio: 'ignore'
19
+ });
20
+ return true;
21
+ } catch {
22
+ return false;
23
+ }
24
+ }
25
+ function findAvailableDisplay(startFrom = 99) {
26
+ for(let n = startFrom; n < startFrom + 100; n++)if (!existsSync(`/tmp/.X${n}-lock`)) return n;
27
+ throw new Error(`No available display number found (checked ${startFrom} to ${startFrom + 99})`);
28
+ }
29
+ function startXvfb(options) {
30
+ const resolution = options?.resolution || '1920x1080x24';
31
+ const displayNum = options?.displayNumber ?? findAvailableDisplay();
32
+ const display = `:${displayNum}`;
33
+ return new Promise((resolve, reject)=>{
34
+ debugXvfb(`Starting Xvfb on display ${display} with resolution ${resolution}`);
35
+ const xvfbProcess = spawn('Xvfb', [
36
+ display,
37
+ '-screen',
38
+ '0',
39
+ resolution,
40
+ '-ac',
41
+ '-nolisten',
42
+ 'tcp'
43
+ ], {
44
+ stdio: 'ignore'
45
+ });
46
+ let settled = false;
47
+ xvfbProcess.on('error', (err)=>{
48
+ if (!settled) {
49
+ settled = true;
50
+ reject(new Error(`Failed to start Xvfb: ${err.message}`));
51
+ }
52
+ });
53
+ xvfbProcess.on('exit', (code)=>{
54
+ if (!settled) {
55
+ settled = true;
56
+ reject(new Error(`Xvfb exited unexpectedly with code ${code}`));
57
+ }
58
+ });
59
+ const instance = {
60
+ process: xvfbProcess,
61
+ display,
62
+ stop () {
63
+ try {
64
+ xvfbProcess.kill('SIGTERM');
65
+ } catch {}
66
+ }
67
+ };
68
+ setTimeout(()=>{
69
+ if (!settled) {
70
+ settled = true;
71
+ debugXvfb(`Xvfb started on display ${display}`);
72
+ resolve(instance);
73
+ }
74
+ }, 500);
75
+ });
76
+ }
77
+ function needsXvfb(explicitOpt) {
78
+ if ('linux' !== process.platform) return false;
79
+ return true === explicitOpt;
80
+ }
13
81
  function _define_property(obj, key, value) {
14
82
  if (key in obj) Object.defineProperty(obj, key, {
15
83
  value: value,
@@ -180,53 +248,74 @@ class ComputerDevice {
180
248
  async connect() {
181
249
  debugDevice('Connecting to computer device');
182
250
  try {
251
+ const headless = this.options?.headless ?? 'true' === process.env.MIDSCENE_COMPUTER_HEADLESS_LINUX;
252
+ if (needsXvfb(headless)) {
253
+ if (!checkXvfbInstalled()) throw new Error('Xvfb is required for headless mode but not installed. Install: sudo apt-get install xvfb');
254
+ this.xvfbInstance = await startXvfb({
255
+ resolution: this.options?.xvfbResolution
256
+ });
257
+ process.env.DISPLAY = this.xvfbInstance.display;
258
+ debugDevice(`Xvfb started on display ${this.xvfbInstance.display}`);
259
+ this.xvfbCleanup = ()=>{
260
+ if (this.xvfbInstance) {
261
+ this.xvfbInstance.stop();
262
+ this.xvfbInstance = void 0;
263
+ }
264
+ };
265
+ process.on('exit', this.xvfbCleanup);
266
+ process.on('SIGINT', this.xvfbCleanup);
267
+ process.on('SIGTERM', this.xvfbCleanup);
268
+ }
183
269
  libnut = await getLibnut();
184
270
  const size = await this.size();
185
271
  const displays = await ComputerDevice.listDisplays();
272
+ const headlessInfo = this.xvfbInstance ? `\nHeadless: true (Xvfb on ${this.xvfbInstance.display})` : '';
186
273
  this.description = `
187
274
  Type: Computer
188
275
  Platform: ${process.platform}
189
276
  Display: ${this.displayId || 'Primary'}
190
277
  Screen Size: ${size.width}x${size.height}
191
- Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ') : 'Unknown'}
278
+ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ') : 'Unknown'}${headlessInfo}
192
279
  `;
193
280
  debugDevice('Computer device connected', this.description);
281
+ await this.healthCheck();
194
282
  } catch (error) {
283
+ if (this.xvfbInstance) {
284
+ this.xvfbInstance.stop();
285
+ this.xvfbInstance = void 0;
286
+ }
195
287
  debugDevice(`Failed to connect: ${error}`);
196
288
  throw new Error(`Unable to connect to computer device: ${error}`);
197
289
  }
198
- await this.healthCheck();
199
290
  }
200
291
  async healthCheck() {
201
292
  console.log('[HealthCheck] Starting health check...');
202
293
  console.log('[HealthCheck] Taking screenshot...');
203
- try {
204
- const base64 = await this.screenshotBase64();
205
- console.log(`[HealthCheck] Screenshot succeeded (length=${base64.length})`);
206
- } catch (error) {
207
- console.error(`[HealthCheck] Screenshot failed: ${error}`);
208
- process.exit(1);
209
- }
294
+ const screenshotTimeout = 15000;
295
+ let timeoutId;
296
+ const timeoutPromise = new Promise((_, reject)=>{
297
+ timeoutId = setTimeout(()=>reject(new Error('Screenshot timed out')), screenshotTimeout);
298
+ });
299
+ const base64 = await Promise.race([
300
+ this.screenshotBase64().finally(()=>clearTimeout(timeoutId)),
301
+ timeoutPromise
302
+ ]);
303
+ console.log(`[HealthCheck] Screenshot succeeded (length=${base64.length})`);
210
304
  console.log('[HealthCheck] Moving mouse...');
211
- try {
212
- node_assert(libnut, 'libnut not initialized');
213
- const startPos = libnut.getMousePos();
214
- console.log(`[HealthCheck] Current mouse position: (${startPos.x}, ${startPos.y})`);
215
- const offsetX = Math.floor(40 * Math.random()) + 10;
216
- const offsetY = Math.floor(40 * Math.random()) + 10;
217
- const targetX = startPos.x + offsetX;
218
- const targetY = startPos.y + offsetY;
219
- console.log(`[HealthCheck] Moving mouse to (${targetX}, ${targetY})...`);
220
- libnut.moveMouse(targetX, targetY);
221
- await sleep(50);
222
- const movedPos = libnut.getMousePos();
223
- console.log(`[HealthCheck] Mouse position after move: (${movedPos.x}, ${movedPos.y})`);
224
- libnut.moveMouse(startPos.x, startPos.y);
225
- console.log(`[HealthCheck] Mouse restored to (${startPos.x}, ${startPos.y})`);
226
- } catch (error) {
227
- console.error(`[HealthCheck] Mouse move failed: ${error}`);
228
- process.exit(1);
229
- }
305
+ node_assert(libnut, 'libnut not initialized');
306
+ const startPos = libnut.getMousePos();
307
+ console.log(`[HealthCheck] Current mouse position: (${startPos.x}, ${startPos.y})`);
308
+ const offsetX = Math.floor(40 * Math.random()) + 10;
309
+ const offsetY = Math.floor(40 * Math.random()) + 10;
310
+ const targetX = startPos.x + offsetX;
311
+ const targetY = startPos.y + offsetY;
312
+ console.log(`[HealthCheck] Moving mouse to (${targetX}, ${targetY})...`);
313
+ libnut.moveMouse(targetX, targetY);
314
+ await sleep(50);
315
+ const movedPos = libnut.getMousePos();
316
+ console.log(`[HealthCheck] Mouse position after move: (${movedPos.x}, ${movedPos.y})`);
317
+ libnut.moveMouse(startPos.x, startPos.y);
318
+ console.log(`[HealthCheck] Mouse restored to (${startPos.x}, ${startPos.y})`);
230
319
  console.log('[HealthCheck] Health check passed');
231
320
  }
232
321
  async screenshotBase64() {
@@ -256,8 +345,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
256
345
  const screenSize = libnut.getScreenSize();
257
346
  return {
258
347
  width: screenSize.width,
259
- height: screenSize.height,
260
- dpr: 1
348
+ height: screenSize.height
261
349
  };
262
350
  } catch (error) {
263
351
  debugDevice(`Failed to get screen size: ${error}`);
@@ -522,6 +610,16 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
522
610
  }
523
611
  async destroy() {
524
612
  if (this.destroyed) return;
613
+ if (this.xvfbInstance) {
614
+ this.xvfbInstance.stop();
615
+ this.xvfbInstance = void 0;
616
+ }
617
+ if (this.xvfbCleanup) {
618
+ process.removeListener('exit', this.xvfbCleanup);
619
+ process.removeListener('SIGINT', this.xvfbCleanup);
620
+ process.removeListener('SIGTERM', this.xvfbCleanup);
621
+ this.xvfbCleanup = void 0;
622
+ }
525
623
  this.destroyed = true;
526
624
  debugDevice('Computer device destroyed');
527
625
  }
@@ -534,6 +632,8 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
534
632
  _define_property(this, "displayId", void 0);
535
633
  _define_property(this, "description", void 0);
536
634
  _define_property(this, "destroyed", false);
635
+ _define_property(this, "xvfbInstance", void 0);
636
+ _define_property(this, "xvfbCleanup", void 0);
537
637
  _define_property(this, "useAppleScript", void 0);
538
638
  _define_property(this, "uri", void 0);
539
639
  this.options = options;
@@ -562,7 +662,7 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
562
662
  createTemporaryDevice() {
563
663
  return new ComputerDevice({});
564
664
  }
565
- async ensureAgent(displayId) {
665
+ async ensureAgent(displayId, headless) {
566
666
  if (this.agent && displayId) {
567
667
  try {
568
668
  await this.agent.destroy?.();
@@ -573,10 +673,15 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
573
673
  }
574
674
  if (this.agent) return this.agent;
575
675
  debug('Creating Computer agent with displayId:', displayId || 'primary');
576
- const opts = displayId ? {
577
- displayId
578
- } : void 0;
579
- const agent = await agentFromComputer(opts);
676
+ const opts = {
677
+ ...displayId ? {
678
+ displayId
679
+ } : {},
680
+ ...void 0 !== headless ? {
681
+ headless
682
+ } : {}
683
+ };
684
+ const agent = await agentFromComputer(Object.keys(opts).length > 0 ? opts : void 0);
580
685
  this.agent = agent;
581
686
  return agent;
582
687
  }
@@ -586,10 +691,11 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
586
691
  name: 'computer_connect',
587
692
  description: 'Connect to computer desktop. Provide displayId to connect to a specific display (use computer_list_displays to get available IDs). If not provided, uses the primary display.',
588
693
  schema: {
589
- displayId: z.string().optional().describe('Display ID (from computer_list_displays)')
694
+ displayId: z.string().optional().describe('Display ID (from computer_list_displays)'),
695
+ headless: z.boolean().optional().describe('Start virtual display via Xvfb (Linux only)')
590
696
  },
591
- handler: async ({ displayId })=>{
592
- const agent = await this.ensureAgent(displayId);
697
+ handler: async ({ displayId, headless })=>{
698
+ const agent = await this.ensureAgent(displayId, headless);
593
699
  const screenshot = await agent.interface.screenshotBase64();
594
700
  return {
595
701
  content: [
package/dist/es/index.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  import node_assert from "node:assert";
2
- import { execSync } from "node:child_process";
2
+ import { execSync, spawn } from "node:child_process";
3
3
  import { createRequire } from "node:module";
4
4
  import { getMidsceneLocationSchema, z } from "@midscene/core";
5
5
  import { actionHoverParamSchema, defineAction, defineActionClearInput, defineActionDoubleClick, defineActionDragAndDrop, defineActionKeyboardPress, defineActionRightClick, defineActionScroll, defineActionTap } from "@midscene/core/device";
@@ -7,8 +7,76 @@ import { sleep } from "@midscene/core/utils";
7
7
  import { createImgBase64ByFormat } from "@midscene/shared/img";
8
8
  import { getDebug } from "@midscene/shared/logger";
9
9
  import screenshot_desktop from "screenshot-desktop";
10
+ import { existsSync } from "node:fs";
10
11
  import { Agent } from "@midscene/core/agent";
11
12
  import { overrideAIConfig } from "@midscene/shared/env";
13
+ const debugXvfb = getDebug('computer:xvfb');
14
+ function checkXvfbInstalled() {
15
+ try {
16
+ execSync('which Xvfb', {
17
+ stdio: 'ignore'
18
+ });
19
+ return true;
20
+ } catch {
21
+ return false;
22
+ }
23
+ }
24
+ function findAvailableDisplay(startFrom = 99) {
25
+ for(let n = startFrom; n < startFrom + 100; n++)if (!existsSync(`/tmp/.X${n}-lock`)) return n;
26
+ throw new Error(`No available display number found (checked ${startFrom} to ${startFrom + 99})`);
27
+ }
28
+ function startXvfb(options) {
29
+ const resolution = options?.resolution || '1920x1080x24';
30
+ const displayNum = options?.displayNumber ?? findAvailableDisplay();
31
+ const display = `:${displayNum}`;
32
+ return new Promise((resolve, reject)=>{
33
+ debugXvfb(`Starting Xvfb on display ${display} with resolution ${resolution}`);
34
+ const xvfbProcess = spawn('Xvfb', [
35
+ display,
36
+ '-screen',
37
+ '0',
38
+ resolution,
39
+ '-ac',
40
+ '-nolisten',
41
+ 'tcp'
42
+ ], {
43
+ stdio: 'ignore'
44
+ });
45
+ let settled = false;
46
+ xvfbProcess.on('error', (err)=>{
47
+ if (!settled) {
48
+ settled = true;
49
+ reject(new Error(`Failed to start Xvfb: ${err.message}`));
50
+ }
51
+ });
52
+ xvfbProcess.on('exit', (code)=>{
53
+ if (!settled) {
54
+ settled = true;
55
+ reject(new Error(`Xvfb exited unexpectedly with code ${code}`));
56
+ }
57
+ });
58
+ const instance = {
59
+ process: xvfbProcess,
60
+ display,
61
+ stop () {
62
+ try {
63
+ xvfbProcess.kill('SIGTERM');
64
+ } catch {}
65
+ }
66
+ };
67
+ setTimeout(()=>{
68
+ if (!settled) {
69
+ settled = true;
70
+ debugXvfb(`Xvfb started on display ${display}`);
71
+ resolve(instance);
72
+ }
73
+ }, 500);
74
+ });
75
+ }
76
+ function needsXvfb(explicitOpt) {
77
+ if ('linux' !== process.platform) return false;
78
+ return true === explicitOpt;
79
+ }
12
80
  function _define_property(obj, key, value) {
13
81
  if (key in obj) Object.defineProperty(obj, key, {
14
82
  value: value,
@@ -179,53 +247,74 @@ class ComputerDevice {
179
247
  async connect() {
180
248
  debugDevice('Connecting to computer device');
181
249
  try {
250
+ const headless = this.options?.headless ?? 'true' === process.env.MIDSCENE_COMPUTER_HEADLESS_LINUX;
251
+ if (needsXvfb(headless)) {
252
+ if (!checkXvfbInstalled()) throw new Error('Xvfb is required for headless mode but not installed. Install: sudo apt-get install xvfb');
253
+ this.xvfbInstance = await startXvfb({
254
+ resolution: this.options?.xvfbResolution
255
+ });
256
+ process.env.DISPLAY = this.xvfbInstance.display;
257
+ debugDevice(`Xvfb started on display ${this.xvfbInstance.display}`);
258
+ this.xvfbCleanup = ()=>{
259
+ if (this.xvfbInstance) {
260
+ this.xvfbInstance.stop();
261
+ this.xvfbInstance = void 0;
262
+ }
263
+ };
264
+ process.on('exit', this.xvfbCleanup);
265
+ process.on('SIGINT', this.xvfbCleanup);
266
+ process.on('SIGTERM', this.xvfbCleanup);
267
+ }
182
268
  device_libnut = await getLibnut();
183
269
  const size = await this.size();
184
270
  const displays = await ComputerDevice.listDisplays();
271
+ const headlessInfo = this.xvfbInstance ? `\nHeadless: true (Xvfb on ${this.xvfbInstance.display})` : '';
185
272
  this.description = `
186
273
  Type: Computer
187
274
  Platform: ${process.platform}
188
275
  Display: ${this.displayId || 'Primary'}
189
276
  Screen Size: ${size.width}x${size.height}
190
- Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ') : 'Unknown'}
277
+ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ') : 'Unknown'}${headlessInfo}
191
278
  `;
192
279
  debugDevice('Computer device connected', this.description);
280
+ await this.healthCheck();
193
281
  } catch (error) {
282
+ if (this.xvfbInstance) {
283
+ this.xvfbInstance.stop();
284
+ this.xvfbInstance = void 0;
285
+ }
194
286
  debugDevice(`Failed to connect: ${error}`);
195
287
  throw new Error(`Unable to connect to computer device: ${error}`);
196
288
  }
197
- await this.healthCheck();
198
289
  }
199
290
  async healthCheck() {
200
291
  console.log('[HealthCheck] Starting health check...');
201
292
  console.log('[HealthCheck] Taking screenshot...');
202
- try {
203
- const base64 = await this.screenshotBase64();
204
- console.log(`[HealthCheck] Screenshot succeeded (length=${base64.length})`);
205
- } catch (error) {
206
- console.error(`[HealthCheck] Screenshot failed: ${error}`);
207
- process.exit(1);
208
- }
293
+ const screenshotTimeout = 15000;
294
+ let timeoutId;
295
+ const timeoutPromise = new Promise((_, reject)=>{
296
+ timeoutId = setTimeout(()=>reject(new Error('Screenshot timed out')), screenshotTimeout);
297
+ });
298
+ const base64 = await Promise.race([
299
+ this.screenshotBase64().finally(()=>clearTimeout(timeoutId)),
300
+ timeoutPromise
301
+ ]);
302
+ console.log(`[HealthCheck] Screenshot succeeded (length=${base64.length})`);
209
303
  console.log('[HealthCheck] Moving mouse...');
210
- try {
211
- node_assert(device_libnut, 'libnut not initialized');
212
- const startPos = device_libnut.getMousePos();
213
- console.log(`[HealthCheck] Current mouse position: (${startPos.x}, ${startPos.y})`);
214
- const offsetX = Math.floor(40 * Math.random()) + 10;
215
- const offsetY = Math.floor(40 * Math.random()) + 10;
216
- const targetX = startPos.x + offsetX;
217
- const targetY = startPos.y + offsetY;
218
- console.log(`[HealthCheck] Moving mouse to (${targetX}, ${targetY})...`);
219
- device_libnut.moveMouse(targetX, targetY);
220
- await sleep(50);
221
- const movedPos = device_libnut.getMousePos();
222
- console.log(`[HealthCheck] Mouse position after move: (${movedPos.x}, ${movedPos.y})`);
223
- device_libnut.moveMouse(startPos.x, startPos.y);
224
- console.log(`[HealthCheck] Mouse restored to (${startPos.x}, ${startPos.y})`);
225
- } catch (error) {
226
- console.error(`[HealthCheck] Mouse move failed: ${error}`);
227
- process.exit(1);
228
- }
304
+ node_assert(device_libnut, 'libnut not initialized');
305
+ const startPos = device_libnut.getMousePos();
306
+ console.log(`[HealthCheck] Current mouse position: (${startPos.x}, ${startPos.y})`);
307
+ const offsetX = Math.floor(40 * Math.random()) + 10;
308
+ const offsetY = Math.floor(40 * Math.random()) + 10;
309
+ const targetX = startPos.x + offsetX;
310
+ const targetY = startPos.y + offsetY;
311
+ console.log(`[HealthCheck] Moving mouse to (${targetX}, ${targetY})...`);
312
+ device_libnut.moveMouse(targetX, targetY);
313
+ await sleep(50);
314
+ const movedPos = device_libnut.getMousePos();
315
+ console.log(`[HealthCheck] Mouse position after move: (${movedPos.x}, ${movedPos.y})`);
316
+ device_libnut.moveMouse(startPos.x, startPos.y);
317
+ console.log(`[HealthCheck] Mouse restored to (${startPos.x}, ${startPos.y})`);
229
318
  console.log('[HealthCheck] Health check passed');
230
319
  }
231
320
  async screenshotBase64() {
@@ -255,8 +344,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
255
344
  const screenSize = device_libnut.getScreenSize();
256
345
  return {
257
346
  width: screenSize.width,
258
- height: screenSize.height,
259
- dpr: 1
347
+ height: screenSize.height
260
348
  };
261
349
  } catch (error) {
262
350
  debugDevice(`Failed to get screen size: ${error}`);
@@ -521,6 +609,16 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
521
609
  }
522
610
  async destroy() {
523
611
  if (this.destroyed) return;
612
+ if (this.xvfbInstance) {
613
+ this.xvfbInstance.stop();
614
+ this.xvfbInstance = void 0;
615
+ }
616
+ if (this.xvfbCleanup) {
617
+ process.removeListener('exit', this.xvfbCleanup);
618
+ process.removeListener('SIGINT', this.xvfbCleanup);
619
+ process.removeListener('SIGTERM', this.xvfbCleanup);
620
+ this.xvfbCleanup = void 0;
621
+ }
524
622
  this.destroyed = true;
525
623
  debugDevice('Computer device destroyed');
526
624
  }
@@ -533,6 +631,8 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
533
631
  _define_property(this, "displayId", void 0);
534
632
  _define_property(this, "description", void 0);
535
633
  _define_property(this, "destroyed", false);
634
+ _define_property(this, "xvfbInstance", void 0);
635
+ _define_property(this, "xvfbCleanup", void 0);
536
636
  _define_property(this, "useAppleScript", void 0);
537
637
  _define_property(this, "uri", void 0);
538
638
  this.options = options;
@@ -621,4 +721,4 @@ async function checkComputerEnvironment() {
621
721
  async function getConnectedDisplays() {
622
722
  return ComputerDevice.listDisplays();
623
723
  }
624
- export { ComputerAgent, ComputerDevice, agentFromComputer, checkAccessibilityPermission, checkComputerEnvironment, getConnectedDisplays, overrideAIConfig };
724
+ export { ComputerAgent, ComputerDevice, agentFromComputer, checkAccessibilityPermission, checkComputerEnvironment, checkXvfbInstalled, getConnectedDisplays, needsXvfb, overrideAIConfig };