@midscene/computer 1.4.6 → 1.4.7-beta-20260226072540.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/es/cli.mjs CHANGED
@@ -4,12 +4,80 @@ import { getDebug } from "@midscene/shared/logger";
4
4
  import { BaseMidsceneTools } from "@midscene/shared/mcp";
5
5
  import { Agent } from "@midscene/core/agent";
6
6
  import node_assert from "node:assert";
7
- import { execSync } from "node:child_process";
7
+ import { execSync, spawn } from "node:child_process";
8
8
  import { createRequire } from "node:module";
9
9
  import { actionHoverParamSchema, defineAction, defineActionClearInput, defineActionDoubleClick, defineActionDragAndDrop, defineActionKeyboardPress, defineActionRightClick, defineActionScroll, defineActionTap } from "@midscene/core/device";
10
10
  import { sleep } from "@midscene/core/utils";
11
11
  import { createImgBase64ByFormat } from "@midscene/shared/img";
12
12
  import screenshot_desktop from "screenshot-desktop";
13
+ import { existsSync } from "node:fs";
14
+ const debugXvfb = getDebug('computer:xvfb');
15
+ function checkXvfbInstalled() {
16
+ try {
17
+ execSync('which Xvfb', {
18
+ stdio: 'ignore'
19
+ });
20
+ return true;
21
+ } catch {
22
+ return false;
23
+ }
24
+ }
25
+ function findAvailableDisplay(startFrom = 99) {
26
+ for(let n = startFrom; n < startFrom + 100; n++)if (!existsSync(`/tmp/.X${n}-lock`)) return n;
27
+ throw new Error(`No available display number found (checked ${startFrom} to ${startFrom + 99})`);
28
+ }
29
+ function startXvfb(options) {
30
+ const resolution = options?.resolution || '1920x1080x24';
31
+ const displayNum = options?.displayNumber ?? findAvailableDisplay();
32
+ const display = `:${displayNum}`;
33
+ return new Promise((resolve, reject)=>{
34
+ debugXvfb(`Starting Xvfb on display ${display} with resolution ${resolution}`);
35
+ const xvfbProcess = spawn('Xvfb', [
36
+ display,
37
+ '-screen',
38
+ '0',
39
+ resolution,
40
+ '-ac',
41
+ '-nolisten',
42
+ 'tcp'
43
+ ], {
44
+ stdio: 'ignore'
45
+ });
46
+ let settled = false;
47
+ xvfbProcess.on('error', (err)=>{
48
+ if (!settled) {
49
+ settled = true;
50
+ reject(new Error(`Failed to start Xvfb: ${err.message}`));
51
+ }
52
+ });
53
+ xvfbProcess.on('exit', (code)=>{
54
+ if (!settled) {
55
+ settled = true;
56
+ reject(new Error(`Xvfb exited unexpectedly with code ${code}`));
57
+ }
58
+ });
59
+ const instance = {
60
+ process: xvfbProcess,
61
+ display,
62
+ stop () {
63
+ try {
64
+ xvfbProcess.kill('SIGTERM');
65
+ } catch {}
66
+ }
67
+ };
68
+ setTimeout(()=>{
69
+ if (!settled) {
70
+ settled = true;
71
+ debugXvfb(`Xvfb started on display ${display}`);
72
+ resolve(instance);
73
+ }
74
+ }, 500);
75
+ });
76
+ }
77
+ function needsXvfb(explicitOpt) {
78
+ if ('linux' !== process.platform) return false;
79
+ return true === explicitOpt;
80
+ }
13
81
  function _define_property(obj, key, value) {
14
82
  if (key in obj) Object.defineProperty(obj, key, {
15
83
  value: value,
@@ -180,53 +248,74 @@ class ComputerDevice {
180
248
  async connect() {
181
249
  debugDevice('Connecting to computer device');
182
250
  try {
251
+ const headless = this.options?.headless ?? 'true' === process.env.MIDSCENE_COMPUTER_HEADLESS_LINUX;
252
+ if (needsXvfb(headless)) {
253
+ if (!checkXvfbInstalled()) throw new Error('Xvfb is required for headless mode but not installed. Install: sudo apt-get install xvfb');
254
+ this.xvfbInstance = await startXvfb({
255
+ resolution: this.options?.xvfbResolution
256
+ });
257
+ process.env.DISPLAY = this.xvfbInstance.display;
258
+ debugDevice(`Xvfb started on display ${this.xvfbInstance.display}`);
259
+ this.xvfbCleanup = ()=>{
260
+ if (this.xvfbInstance) {
261
+ this.xvfbInstance.stop();
262
+ this.xvfbInstance = void 0;
263
+ }
264
+ };
265
+ process.on('exit', this.xvfbCleanup);
266
+ process.on('SIGINT', this.xvfbCleanup);
267
+ process.on('SIGTERM', this.xvfbCleanup);
268
+ }
183
269
  libnut = await getLibnut();
184
270
  const size = await this.size();
185
271
  const displays = await ComputerDevice.listDisplays();
272
+ const headlessInfo = this.xvfbInstance ? `\nHeadless: true (Xvfb on ${this.xvfbInstance.display})` : '';
186
273
  this.description = `
187
274
  Type: Computer
188
275
  Platform: ${process.platform}
189
276
  Display: ${this.displayId || 'Primary'}
190
277
  Screen Size: ${size.width}x${size.height}
191
- Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ') : 'Unknown'}
278
+ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ') : 'Unknown'}${headlessInfo}
192
279
  `;
193
280
  debugDevice('Computer device connected', this.description);
281
+ await this.healthCheck();
194
282
  } catch (error) {
283
+ if (this.xvfbInstance) {
284
+ this.xvfbInstance.stop();
285
+ this.xvfbInstance = void 0;
286
+ }
195
287
  debugDevice(`Failed to connect: ${error}`);
196
288
  throw new Error(`Unable to connect to computer device: ${error}`);
197
289
  }
198
- await this.healthCheck();
199
290
  }
200
291
  async healthCheck() {
201
292
  console.log('[HealthCheck] Starting health check...');
202
293
  console.log('[HealthCheck] Taking screenshot...');
203
- try {
204
- const base64 = await this.screenshotBase64();
205
- console.log(`[HealthCheck] Screenshot succeeded (length=${base64.length})`);
206
- } catch (error) {
207
- console.error(`[HealthCheck] Screenshot failed: ${error}`);
208
- process.exit(1);
209
- }
294
+ const screenshotTimeout = 15000;
295
+ let timeoutId;
296
+ const timeoutPromise = new Promise((_, reject)=>{
297
+ timeoutId = setTimeout(()=>reject(new Error('Screenshot timed out')), screenshotTimeout);
298
+ });
299
+ const base64 = await Promise.race([
300
+ this.screenshotBase64().finally(()=>clearTimeout(timeoutId)),
301
+ timeoutPromise
302
+ ]);
303
+ console.log(`[HealthCheck] Screenshot succeeded (length=${base64.length})`);
210
304
  console.log('[HealthCheck] Moving mouse...');
211
- try {
212
- node_assert(libnut, 'libnut not initialized');
213
- const startPos = libnut.getMousePos();
214
- console.log(`[HealthCheck] Current mouse position: (${startPos.x}, ${startPos.y})`);
215
- const offsetX = Math.floor(40 * Math.random()) + 10;
216
- const offsetY = Math.floor(40 * Math.random()) + 10;
217
- const targetX = startPos.x + offsetX;
218
- const targetY = startPos.y + offsetY;
219
- console.log(`[HealthCheck] Moving mouse to (${targetX}, ${targetY})...`);
220
- libnut.moveMouse(targetX, targetY);
221
- await sleep(50);
222
- const movedPos = libnut.getMousePos();
223
- console.log(`[HealthCheck] Mouse position after move: (${movedPos.x}, ${movedPos.y})`);
224
- libnut.moveMouse(startPos.x, startPos.y);
225
- console.log(`[HealthCheck] Mouse restored to (${startPos.x}, ${startPos.y})`);
226
- } catch (error) {
227
- console.error(`[HealthCheck] Mouse move failed: ${error}`);
228
- process.exit(1);
229
- }
305
+ node_assert(libnut, 'libnut not initialized');
306
+ const startPos = libnut.getMousePos();
307
+ console.log(`[HealthCheck] Current mouse position: (${startPos.x}, ${startPos.y})`);
308
+ const offsetX = Math.floor(40 * Math.random()) + 10;
309
+ const offsetY = Math.floor(40 * Math.random()) + 10;
310
+ const targetX = startPos.x + offsetX;
311
+ const targetY = startPos.y + offsetY;
312
+ console.log(`[HealthCheck] Moving mouse to (${targetX}, ${targetY})...`);
313
+ libnut.moveMouse(targetX, targetY);
314
+ await sleep(50);
315
+ const movedPos = libnut.getMousePos();
316
+ console.log(`[HealthCheck] Mouse position after move: (${movedPos.x}, ${movedPos.y})`);
317
+ libnut.moveMouse(startPos.x, startPos.y);
318
+ console.log(`[HealthCheck] Mouse restored to (${startPos.x}, ${startPos.y})`);
230
319
  console.log('[HealthCheck] Health check passed');
231
320
  }
232
321
  async screenshotBase64() {
@@ -522,6 +611,16 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
522
611
  }
523
612
  async destroy() {
524
613
  if (this.destroyed) return;
614
+ if (this.xvfbInstance) {
615
+ this.xvfbInstance.stop();
616
+ this.xvfbInstance = void 0;
617
+ }
618
+ if (this.xvfbCleanup) {
619
+ process.removeListener('exit', this.xvfbCleanup);
620
+ process.removeListener('SIGINT', this.xvfbCleanup);
621
+ process.removeListener('SIGTERM', this.xvfbCleanup);
622
+ this.xvfbCleanup = void 0;
623
+ }
525
624
  this.destroyed = true;
526
625
  debugDevice('Computer device destroyed');
527
626
  }
@@ -534,6 +633,8 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
534
633
  _define_property(this, "displayId", void 0);
535
634
  _define_property(this, "description", void 0);
536
635
  _define_property(this, "destroyed", false);
636
+ _define_property(this, "xvfbInstance", void 0);
637
+ _define_property(this, "xvfbCleanup", void 0);
537
638
  _define_property(this, "useAppleScript", void 0);
538
639
  _define_property(this, "uri", void 0);
539
640
  this.options = options;
@@ -562,7 +663,7 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
562
663
  createTemporaryDevice() {
563
664
  return new ComputerDevice({});
564
665
  }
565
- async ensureAgent(displayId) {
666
+ async ensureAgent(displayId, headless) {
566
667
  if (this.agent && displayId) {
567
668
  try {
568
669
  await this.agent.destroy?.();
@@ -573,10 +674,15 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
573
674
  }
574
675
  if (this.agent) return this.agent;
575
676
  debug('Creating Computer agent with displayId:', displayId || 'primary');
576
- const opts = displayId ? {
577
- displayId
578
- } : void 0;
579
- const agent = await agentFromComputer(opts);
677
+ const opts = {
678
+ ...displayId ? {
679
+ displayId
680
+ } : {},
681
+ ...void 0 !== headless ? {
682
+ headless
683
+ } : {}
684
+ };
685
+ const agent = await agentFromComputer(Object.keys(opts).length > 0 ? opts : void 0);
580
686
  this.agent = agent;
581
687
  return agent;
582
688
  }
@@ -586,10 +692,11 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
586
692
  name: 'computer_connect',
587
693
  description: 'Connect to computer desktop. Provide displayId to connect to a specific display (use computer_list_displays to get available IDs). If not provided, uses the primary display.',
588
694
  schema: {
589
- displayId: z.string().optional().describe('Display ID (from computer_list_displays)')
695
+ displayId: z.string().optional().describe('Display ID (from computer_list_displays)'),
696
+ headless: z.boolean().optional().describe('Start virtual display via Xvfb (Linux only)')
590
697
  },
591
- handler: async ({ displayId })=>{
592
- const agent = await this.ensureAgent(displayId);
698
+ handler: async ({ displayId, headless })=>{
699
+ const agent = await this.ensureAgent(displayId, headless);
593
700
  const screenshot = await agent.interface.screenshotBase64();
594
701
  return {
595
702
  content: [
package/dist/es/index.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  import node_assert from "node:assert";
2
- import { execSync } from "node:child_process";
2
+ import { execSync, spawn } from "node:child_process";
3
3
  import { createRequire } from "node:module";
4
4
  import { getMidsceneLocationSchema, z } from "@midscene/core";
5
5
  import { actionHoverParamSchema, defineAction, defineActionClearInput, defineActionDoubleClick, defineActionDragAndDrop, defineActionKeyboardPress, defineActionRightClick, defineActionScroll, defineActionTap } from "@midscene/core/device";
@@ -7,8 +7,76 @@ import { sleep } from "@midscene/core/utils";
7
7
  import { createImgBase64ByFormat } from "@midscene/shared/img";
8
8
  import { getDebug } from "@midscene/shared/logger";
9
9
  import screenshot_desktop from "screenshot-desktop";
10
+ import { existsSync } from "node:fs";
10
11
  import { Agent } from "@midscene/core/agent";
11
12
  import { overrideAIConfig } from "@midscene/shared/env";
13
+ const debugXvfb = getDebug('computer:xvfb');
14
+ function checkXvfbInstalled() {
15
+ try {
16
+ execSync('which Xvfb', {
17
+ stdio: 'ignore'
18
+ });
19
+ return true;
20
+ } catch {
21
+ return false;
22
+ }
23
+ }
24
+ function findAvailableDisplay(startFrom = 99) {
25
+ for(let n = startFrom; n < startFrom + 100; n++)if (!existsSync(`/tmp/.X${n}-lock`)) return n;
26
+ throw new Error(`No available display number found (checked ${startFrom} to ${startFrom + 99})`);
27
+ }
28
+ function startXvfb(options) {
29
+ const resolution = options?.resolution || '1920x1080x24';
30
+ const displayNum = options?.displayNumber ?? findAvailableDisplay();
31
+ const display = `:${displayNum}`;
32
+ return new Promise((resolve, reject)=>{
33
+ debugXvfb(`Starting Xvfb on display ${display} with resolution ${resolution}`);
34
+ const xvfbProcess = spawn('Xvfb', [
35
+ display,
36
+ '-screen',
37
+ '0',
38
+ resolution,
39
+ '-ac',
40
+ '-nolisten',
41
+ 'tcp'
42
+ ], {
43
+ stdio: 'ignore'
44
+ });
45
+ let settled = false;
46
+ xvfbProcess.on('error', (err)=>{
47
+ if (!settled) {
48
+ settled = true;
49
+ reject(new Error(`Failed to start Xvfb: ${err.message}`));
50
+ }
51
+ });
52
+ xvfbProcess.on('exit', (code)=>{
53
+ if (!settled) {
54
+ settled = true;
55
+ reject(new Error(`Xvfb exited unexpectedly with code ${code}`));
56
+ }
57
+ });
58
+ const instance = {
59
+ process: xvfbProcess,
60
+ display,
61
+ stop () {
62
+ try {
63
+ xvfbProcess.kill('SIGTERM');
64
+ } catch {}
65
+ }
66
+ };
67
+ setTimeout(()=>{
68
+ if (!settled) {
69
+ settled = true;
70
+ debugXvfb(`Xvfb started on display ${display}`);
71
+ resolve(instance);
72
+ }
73
+ }, 500);
74
+ });
75
+ }
76
+ function needsXvfb(explicitOpt) {
77
+ if ('linux' !== process.platform) return false;
78
+ return true === explicitOpt;
79
+ }
12
80
  function _define_property(obj, key, value) {
13
81
  if (key in obj) Object.defineProperty(obj, key, {
14
82
  value: value,
@@ -179,53 +247,74 @@ class ComputerDevice {
179
247
  async connect() {
180
248
  debugDevice('Connecting to computer device');
181
249
  try {
250
+ const headless = this.options?.headless ?? 'true' === process.env.MIDSCENE_COMPUTER_HEADLESS_LINUX;
251
+ if (needsXvfb(headless)) {
252
+ if (!checkXvfbInstalled()) throw new Error('Xvfb is required for headless mode but not installed. Install: sudo apt-get install xvfb');
253
+ this.xvfbInstance = await startXvfb({
254
+ resolution: this.options?.xvfbResolution
255
+ });
256
+ process.env.DISPLAY = this.xvfbInstance.display;
257
+ debugDevice(`Xvfb started on display ${this.xvfbInstance.display}`);
258
+ this.xvfbCleanup = ()=>{
259
+ if (this.xvfbInstance) {
260
+ this.xvfbInstance.stop();
261
+ this.xvfbInstance = void 0;
262
+ }
263
+ };
264
+ process.on('exit', this.xvfbCleanup);
265
+ process.on('SIGINT', this.xvfbCleanup);
266
+ process.on('SIGTERM', this.xvfbCleanup);
267
+ }
182
268
  device_libnut = await getLibnut();
183
269
  const size = await this.size();
184
270
  const displays = await ComputerDevice.listDisplays();
271
+ const headlessInfo = this.xvfbInstance ? `\nHeadless: true (Xvfb on ${this.xvfbInstance.display})` : '';
185
272
  this.description = `
186
273
  Type: Computer
187
274
  Platform: ${process.platform}
188
275
  Display: ${this.displayId || 'Primary'}
189
276
  Screen Size: ${size.width}x${size.height}
190
- Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ') : 'Unknown'}
277
+ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ') : 'Unknown'}${headlessInfo}
191
278
  `;
192
279
  debugDevice('Computer device connected', this.description);
280
+ await this.healthCheck();
193
281
  } catch (error) {
282
+ if (this.xvfbInstance) {
283
+ this.xvfbInstance.stop();
284
+ this.xvfbInstance = void 0;
285
+ }
194
286
  debugDevice(`Failed to connect: ${error}`);
195
287
  throw new Error(`Unable to connect to computer device: ${error}`);
196
288
  }
197
- await this.healthCheck();
198
289
  }
199
290
  async healthCheck() {
200
291
  console.log('[HealthCheck] Starting health check...');
201
292
  console.log('[HealthCheck] Taking screenshot...');
202
- try {
203
- const base64 = await this.screenshotBase64();
204
- console.log(`[HealthCheck] Screenshot succeeded (length=${base64.length})`);
205
- } catch (error) {
206
- console.error(`[HealthCheck] Screenshot failed: ${error}`);
207
- process.exit(1);
208
- }
293
+ const screenshotTimeout = 15000;
294
+ let timeoutId;
295
+ const timeoutPromise = new Promise((_, reject)=>{
296
+ timeoutId = setTimeout(()=>reject(new Error('Screenshot timed out')), screenshotTimeout);
297
+ });
298
+ const base64 = await Promise.race([
299
+ this.screenshotBase64().finally(()=>clearTimeout(timeoutId)),
300
+ timeoutPromise
301
+ ]);
302
+ console.log(`[HealthCheck] Screenshot succeeded (length=${base64.length})`);
209
303
  console.log('[HealthCheck] Moving mouse...');
210
- try {
211
- node_assert(device_libnut, 'libnut not initialized');
212
- const startPos = device_libnut.getMousePos();
213
- console.log(`[HealthCheck] Current mouse position: (${startPos.x}, ${startPos.y})`);
214
- const offsetX = Math.floor(40 * Math.random()) + 10;
215
- const offsetY = Math.floor(40 * Math.random()) + 10;
216
- const targetX = startPos.x + offsetX;
217
- const targetY = startPos.y + offsetY;
218
- console.log(`[HealthCheck] Moving mouse to (${targetX}, ${targetY})...`);
219
- device_libnut.moveMouse(targetX, targetY);
220
- await sleep(50);
221
- const movedPos = device_libnut.getMousePos();
222
- console.log(`[HealthCheck] Mouse position after move: (${movedPos.x}, ${movedPos.y})`);
223
- device_libnut.moveMouse(startPos.x, startPos.y);
224
- console.log(`[HealthCheck] Mouse restored to (${startPos.x}, ${startPos.y})`);
225
- } catch (error) {
226
- console.error(`[HealthCheck] Mouse move failed: ${error}`);
227
- process.exit(1);
228
- }
304
+ node_assert(device_libnut, 'libnut not initialized');
305
+ const startPos = device_libnut.getMousePos();
306
+ console.log(`[HealthCheck] Current mouse position: (${startPos.x}, ${startPos.y})`);
307
+ const offsetX = Math.floor(40 * Math.random()) + 10;
308
+ const offsetY = Math.floor(40 * Math.random()) + 10;
309
+ const targetX = startPos.x + offsetX;
310
+ const targetY = startPos.y + offsetY;
311
+ console.log(`[HealthCheck] Moving mouse to (${targetX}, ${targetY})...`);
312
+ device_libnut.moveMouse(targetX, targetY);
313
+ await sleep(50);
314
+ const movedPos = device_libnut.getMousePos();
315
+ console.log(`[HealthCheck] Mouse position after move: (${movedPos.x}, ${movedPos.y})`);
316
+ device_libnut.moveMouse(startPos.x, startPos.y);
317
+ console.log(`[HealthCheck] Mouse restored to (${startPos.x}, ${startPos.y})`);
229
318
  console.log('[HealthCheck] Health check passed');
230
319
  }
231
320
  async screenshotBase64() {
@@ -521,6 +610,16 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
521
610
  }
522
611
  async destroy() {
523
612
  if (this.destroyed) return;
613
+ if (this.xvfbInstance) {
614
+ this.xvfbInstance.stop();
615
+ this.xvfbInstance = void 0;
616
+ }
617
+ if (this.xvfbCleanup) {
618
+ process.removeListener('exit', this.xvfbCleanup);
619
+ process.removeListener('SIGINT', this.xvfbCleanup);
620
+ process.removeListener('SIGTERM', this.xvfbCleanup);
621
+ this.xvfbCleanup = void 0;
622
+ }
524
623
  this.destroyed = true;
525
624
  debugDevice('Computer device destroyed');
526
625
  }
@@ -533,6 +632,8 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
533
632
  _define_property(this, "displayId", void 0);
534
633
  _define_property(this, "description", void 0);
535
634
  _define_property(this, "destroyed", false);
635
+ _define_property(this, "xvfbInstance", void 0);
636
+ _define_property(this, "xvfbCleanup", void 0);
536
637
  _define_property(this, "useAppleScript", void 0);
537
638
  _define_property(this, "uri", void 0);
538
639
  this.options = options;
@@ -621,4 +722,4 @@ async function checkComputerEnvironment() {
621
722
  async function getConnectedDisplays() {
622
723
  return ComputerDevice.listDisplays();
623
724
  }
624
- export { ComputerAgent, ComputerDevice, agentFromComputer, checkAccessibilityPermission, checkComputerEnvironment, getConnectedDisplays, overrideAIConfig };
725
+ export { ComputerAgent, ComputerDevice, agentFromComputer, checkAccessibilityPermission, checkComputerEnvironment, checkXvfbInstalled, getConnectedDisplays, needsXvfb, overrideAIConfig };