@midscene/ios 0.30.6-beta-20251022112352.0 → 0.30.6-beta-20251023082056.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/es/bin.mjs CHANGED
@@ -11520,8 +11520,8 @@ class PlaygroundServer {
11520
11520
  }));
11521
11521
  this._app.use((req, _res, next)=>{
11522
11522
  const { context } = req.body || {};
11523
- if (context && 'updateContext' in this.page && 'function' == typeof this.page.updateContext) {
11524
- this.page.updateContext(context);
11523
+ if (context && 'updateContext' in this.agent.interface && 'function' == typeof this.agent.interface.updateContext) {
11524
+ this.agent.interface.updateContext(context);
11525
11525
  console.log('Context updated by PlaygroundServer middleware');
11526
11526
  }
11527
11527
  next();
@@ -11540,6 +11540,22 @@ class PlaygroundServer {
11540
11540
  writeFileSync(tmpFile, context);
11541
11541
  return tmpFile;
11542
11542
  }
11543
+ async recreateAgent() {
11544
+ if (!this.agentFactory) return void console.warn('Cannot recreate agent: factory function not provided. Agent recreation is only available when using factory mode.');
11545
+ console.log('Recreating agent to cancel current task...');
11546
+ try {
11547
+ if (this.agent && 'function' == typeof this.agent.destroy) await this.agent.destroy();
11548
+ } catch (error) {
11549
+ console.warn('Failed to destroy old agent:', error);
11550
+ }
11551
+ try {
11552
+ this.agent = await this.agentFactory();
11553
+ console.log('Agent recreated successfully');
11554
+ } catch (error) {
11555
+ console.error('Failed to recreate agent:', error);
11556
+ throw error;
11557
+ }
11558
+ }
11543
11559
  setupRoutes() {
11544
11560
  this._app.get('/status', async (req, res)=>{
11545
11561
  res.send({
@@ -11567,7 +11583,7 @@ class PlaygroundServer {
11567
11583
  this._app.post('/action-space', async (req, res)=>{
11568
11584
  try {
11569
11585
  let actionSpace = [];
11570
- actionSpace = await this.page.actionSpace();
11586
+ actionSpace = await this.agent.interface.actionSpace();
11571
11587
  const processedActionSpace = actionSpace.map((action)=>{
11572
11588
  if (action && 'object' == typeof action && 'paramSchema' in action) {
11573
11589
  const typedAction = action;
@@ -11616,7 +11632,12 @@ class PlaygroundServer {
11616
11632
  if (!type) return res.status(400).json({
11617
11633
  error: 'type is required'
11618
11634
  });
11635
+ if (this.currentTaskId) return res.status(409).json({
11636
+ error: 'Another task is already running',
11637
+ currentTaskId: this.currentTaskId
11638
+ });
11619
11639
  if (requestId) {
11640
+ this.currentTaskId = requestId;
11620
11641
  this.taskProgressTips[requestId] = '';
11621
11642
  this.agent.onTaskStartTip = (tip)=>{
11622
11643
  this.taskProgressTips[requestId] = tip;
@@ -11631,7 +11652,7 @@ class PlaygroundServer {
11631
11652
  };
11632
11653
  const startTime = Date.now();
11633
11654
  try {
11634
- const actionSpace = await this.page.actionSpace();
11655
+ const actionSpace = await this.agent.interface.actionSpace();
11635
11656
  const value = {
11636
11657
  type,
11637
11658
  prompt,
@@ -11658,7 +11679,10 @@ class PlaygroundServer {
11658
11679
  const timeCost = Date.now() - startTime;
11659
11680
  if (response.error) console.error(`handle request failed after ${timeCost}ms: requestId: ${requestId}, ${response.error}`);
11660
11681
  else console.log(`handle request done after ${timeCost}ms: requestId: ${requestId}`);
11661
- if (requestId) delete this.taskProgressTips[requestId];
11682
+ if (requestId) {
11683
+ delete this.taskProgressTips[requestId];
11684
+ if (this.currentTaskId === requestId) this.currentTaskId = null;
11685
+ }
11662
11686
  });
11663
11687
  this._app.post('/cancel/:requestId', async (req, res)=>{
11664
11688
  const { requestId } = req.params;
@@ -11666,9 +11690,17 @@ class PlaygroundServer {
11666
11690
  error: 'requestId is required'
11667
11691
  });
11668
11692
  try {
11669
- if (this.taskProgressTips[requestId]) delete this.taskProgressTips[requestId];
11693
+ if (this.currentTaskId !== requestId) return res.json({
11694
+ status: 'not_found',
11695
+ message: 'Task not found or already completed'
11696
+ });
11697
+ console.log(`Cancelling task: ${requestId}`);
11698
+ await this.recreateAgent();
11699
+ delete this.taskProgressTips[requestId];
11700
+ this.currentTaskId = null;
11670
11701
  res.json({
11671
- status: 'cancelled'
11702
+ status: 'cancelled',
11703
+ message: 'Task cancelled successfully by recreating agent'
11672
11704
  });
11673
11705
  } catch (error) {
11674
11706
  const errorMessage = error instanceof Error ? error.message : 'Unknown error';
@@ -11680,10 +11712,10 @@ class PlaygroundServer {
11680
11712
  });
11681
11713
  this._app.get('/screenshot', async (_req, res)=>{
11682
11714
  try {
11683
- if ('function' != typeof this.page.screenshotBase64) return res.status(500).json({
11715
+ if ('function' != typeof this.agent.interface.screenshotBase64) return res.status(500).json({
11684
11716
  error: 'Screenshot method not available on current interface'
11685
11717
  });
11686
- const base64Screenshot = await this.page.screenshotBase64();
11718
+ const base64Screenshot = await this.agent.interface.screenshotBase64();
11687
11719
  res.json({
11688
11720
  screenshot: base64Screenshot,
11689
11721
  timestamp: Date.now()
@@ -11698,9 +11730,9 @@ class PlaygroundServer {
11698
11730
  });
11699
11731
  this._app.get('/interface-info', async (_req, res)=>{
11700
11732
  try {
11701
- var _this_page_describe, _this_page;
11702
- const type = this.page.interfaceType || 'Unknown';
11703
- const description = (null == (_this_page_describe = (_this_page = this.page).describe) ? void 0 : _this_page_describe.call(_this_page)) || void 0;
11733
+ var _this_agent_interface_describe, _this_agent_interface;
11734
+ const type = this.agent.interface.interfaceType || 'Unknown';
11735
+ const description = (null == (_this_agent_interface_describe = (_this_agent_interface = this.agent.interface).describe) ? void 0 : _this_agent_interface_describe.call(_this_agent_interface)) || void 0;
11704
11736
  res.json({
11705
11737
  type,
11706
11738
  description
@@ -11768,6 +11800,11 @@ class PlaygroundServer {
11768
11800
  }
11769
11801
  }
11770
11802
  async launch(port) {
11803
+ if (this.agentFactory) {
11804
+ console.log('Initializing agent from factory function...');
11805
+ this.agent = await this.agentFactory();
11806
+ console.log('Agent initialized successfully');
11807
+ }
11771
11808
  this.initializeApp();
11772
11809
  this.port = port || defaultPort;
11773
11810
  return new Promise((resolve)=>{
@@ -11796,24 +11833,30 @@ class PlaygroundServer {
11796
11833
  } else resolve();
11797
11834
  });
11798
11835
  }
11799
- constructor(page, agent, staticPath = STATIC_PATH, id){
11836
+ constructor(agent, staticPath = STATIC_PATH, id){
11800
11837
  _define_property(this, "_app", void 0);
11801
11838
  _define_property(this, "tmpDir", void 0);
11802
11839
  _define_property(this, "server", void 0);
11803
11840
  _define_property(this, "port", void 0);
11804
- _define_property(this, "page", void 0);
11805
11841
  _define_property(this, "agent", void 0);
11806
11842
  _define_property(this, "staticPath", void 0);
11807
11843
  _define_property(this, "taskProgressTips", void 0);
11808
11844
  _define_property(this, "id", void 0);
11809
11845
  _define_property(this, "_initialized", false);
11846
+ _define_property(this, "agentFactory", void 0);
11847
+ _define_property(this, "currentTaskId", null);
11810
11848
  this._app = express_default()();
11811
11849
  this.tmpDir = getTmpDir();
11812
- this.page = page;
11813
- this.agent = agent;
11814
11850
  this.staticPath = staticPath;
11815
11851
  this.taskProgressTips = {};
11816
11852
  this.id = id || utils_uuid();
11853
+ if ('function' == typeof agent) {
11854
+ this.agentFactory = agent;
11855
+ this.agent = null;
11856
+ } else {
11857
+ this.agent = agent;
11858
+ this.agentFactory = null;
11859
+ }
11817
11860
  }
11818
11861
  }
11819
11862
  __webpack_require__("../../node_modules/.pnpm/cors@2.8.5/node_modules/cors/lib/index.js");
@@ -12002,19 +12045,45 @@ class IOSWebDriverClient extends WebDriverClient {
12002
12045
  }
12003
12046
  async swipe(fromX, fromY, toX, toY, duration = 500) {
12004
12047
  this.ensureSession();
12005
- try {
12006
- await this.makeRequest('POST', `/session/${this.sessionId}/wda/dragfromtoforduration`, {
12007
- fromX,
12008
- fromY,
12009
- toX,
12010
- toY,
12011
- duration: duration / 1000
12012
- });
12013
- debugIOS(`Swiped from (${fromX}, ${fromY}) to (${toX}, ${toY}) in ${duration}ms`);
12014
- } catch (error) {
12015
- debugIOS(`Failed to swipe from (${fromX}, ${fromY}) to (${toX}, ${toY}): ${error}`);
12016
- throw new Error(`Failed to swipe: ${error}`);
12017
- }
12048
+ const actions = {
12049
+ actions: [
12050
+ {
12051
+ type: 'pointer',
12052
+ id: 'finger1',
12053
+ parameters: {
12054
+ pointerType: 'touch'
12055
+ },
12056
+ actions: [
12057
+ {
12058
+ type: 'pointerMove',
12059
+ duration: 0,
12060
+ x: fromX,
12061
+ y: fromY
12062
+ },
12063
+ {
12064
+ type: 'pointerDown',
12065
+ button: 0
12066
+ },
12067
+ {
12068
+ type: 'pause',
12069
+ duration: 100
12070
+ },
12071
+ {
12072
+ type: 'pointerMove',
12073
+ duration,
12074
+ x: toX,
12075
+ y: toY
12076
+ },
12077
+ {
12078
+ type: 'pointerUp',
12079
+ button: 0
12080
+ }
12081
+ ]
12082
+ }
12083
+ ]
12084
+ };
12085
+ await this.makeRequest('POST', `/session/${this.sessionId}/actions`, actions);
12086
+ debugIOS(`Swiped using W3C Actions from (${fromX}, ${fromY}) to (${toX}, ${toY}) in ${duration}ms`);
12018
12087
  }
12019
12088
  async longPress(x, y, duration = 1000) {
12020
12089
  this.ensureSession();
@@ -12160,8 +12229,7 @@ class device_IOSDevice {
12160
12229
  await this.swipe(from.center[0], from.center[1], to.center[0], to.center[1]);
12161
12230
  }),
12162
12231
  defineActionKeyboardPress(async (param)=>{
12163
- const key = param.keyName;
12164
- await this.pressKey(key);
12232
+ await this.pressKey(param.keyName);
12165
12233
  }),
12166
12234
  defineAction({
12167
12235
  name: 'IOSHomeButton',
@@ -12317,23 +12385,23 @@ ScreenSize: ${size.width}x${size.height} (DPR: ${size.scale})
12317
12385
  return '';
12318
12386
  }
12319
12387
  async tap(x, y) {
12320
- await this.wdaBackend.tap(x, y);
12388
+ await this.wdaBackend.tap(Math.round(x), Math.round(y));
12321
12389
  }
12322
12390
  async mouseClick(x, y) {
12323
12391
  debugDevice(`mouseClick at coordinates (${x}, ${y})`);
12324
12392
  await this.tap(x, y);
12325
12393
  }
12326
12394
  async doubleTap(x, y) {
12327
- await this.wdaBackend.doubleTap(x, y);
12395
+ await this.wdaBackend.doubleTap(Math.round(x), Math.round(y));
12328
12396
  }
12329
12397
  async tripleTap(x, y) {
12330
- await this.wdaBackend.tripleTap(x, y);
12398
+ await this.wdaBackend.tripleTap(Math.round(x), Math.round(y));
12331
12399
  }
12332
12400
  async longPress(x, y, duration = 1000) {
12333
- await this.wdaBackend.longPress(x, y, duration);
12401
+ await this.wdaBackend.longPress(Math.round(x), Math.round(y), duration);
12334
12402
  }
12335
12403
  async swipe(fromX, fromY, toX, toY, duration = 500) {
12336
- await this.wdaBackend.swipe(fromX, fromY, toX, toY, duration);
12404
+ await this.wdaBackend.swipe(Math.round(fromX), Math.round(fromY), Math.round(toX), Math.round(toY), duration);
12337
12405
  }
12338
12406
  async typeText(text, options) {
12339
12407
  var _this_options;
@@ -12356,49 +12424,49 @@ ScreenSize: ${size.width}x${size.height} (DPR: ${size.scale})
12356
12424
  async scrollUp(distance, startPoint) {
12357
12425
  const { width, height } = await this.size();
12358
12426
  const start = startPoint ? {
12359
- x: startPoint.left,
12360
- y: startPoint.top
12427
+ x: Math.round(startPoint.left),
12428
+ y: Math.round(startPoint.top)
12361
12429
  } : {
12362
- x: width / 2,
12363
- y: height / 2
12430
+ x: Math.round(width / 2),
12431
+ y: Math.round(height / 2)
12364
12432
  };
12365
- const scrollDistance = distance || height / 3;
12433
+ const scrollDistance = Math.round(distance || height / 3);
12366
12434
  await this.swipe(start.x, start.y, start.x, start.y + scrollDistance);
12367
12435
  }
12368
12436
  async scrollDown(distance, startPoint) {
12369
12437
  const { width, height } = await this.size();
12370
12438
  const start = startPoint ? {
12371
- x: startPoint.left,
12372
- y: startPoint.top
12439
+ x: Math.round(startPoint.left),
12440
+ y: Math.round(startPoint.top)
12373
12441
  } : {
12374
- x: width / 2,
12375
- y: height / 2
12442
+ x: Math.round(width / 2),
12443
+ y: Math.round(height / 2)
12376
12444
  };
12377
- const scrollDistance = distance || height / 3;
12445
+ const scrollDistance = Math.round(distance || height / 3);
12378
12446
  await this.swipe(start.x, start.y, start.x, start.y - scrollDistance);
12379
12447
  }
12380
12448
  async scrollLeft(distance, startPoint) {
12381
12449
  const { width, height } = await this.size();
12382
12450
  const start = startPoint ? {
12383
- x: startPoint.left,
12384
- y: startPoint.top
12451
+ x: Math.round(startPoint.left),
12452
+ y: Math.round(startPoint.top)
12385
12453
  } : {
12386
- x: width / 2,
12387
- y: height / 2
12454
+ x: Math.round(width / 2),
12455
+ y: Math.round(height / 2)
12388
12456
  };
12389
- const scrollDistance = distance || width / 3;
12457
+ const scrollDistance = Math.round(distance || 0.7 * width);
12390
12458
  await this.swipe(start.x, start.y, start.x + scrollDistance, start.y);
12391
12459
  }
12392
12460
  async scrollRight(distance, startPoint) {
12393
12461
  const { width, height } = await this.size();
12394
12462
  const start = startPoint ? {
12395
- x: startPoint.left,
12396
- y: startPoint.top
12463
+ x: Math.round(startPoint.left),
12464
+ y: Math.round(startPoint.top)
12397
12465
  } : {
12398
- x: width / 2,
12399
- y: height / 2
12466
+ x: Math.round(width / 2),
12467
+ y: Math.round(height / 2)
12400
12468
  };
12401
- const scrollDistance = distance || width / 3;
12469
+ const scrollDistance = Math.round(distance || 0.7 * width);
12402
12470
  await this.swipe(start.x, start.y, start.x - scrollDistance, start.y);
12403
12471
  }
12404
12472
  async scrollUntilTop(startPoint) {
@@ -12439,32 +12507,32 @@ ScreenSize: ${size.width}x${size.height} (DPR: ${size.scale})
12439
12507
  const { width, height } = await this.size();
12440
12508
  let start;
12441
12509
  if (startPoint) start = {
12442
- x: startPoint.left,
12443
- y: startPoint.top
12510
+ x: Math.round(startPoint.left),
12511
+ y: Math.round(startPoint.top)
12444
12512
  };
12445
12513
  else switch(direction){
12446
12514
  case 'up':
12447
12515
  start = {
12448
- x: width / 2,
12449
- y: 0.2 * height
12516
+ x: Math.round(width / 2),
12517
+ y: Math.round(0.2 * height)
12450
12518
  };
12451
12519
  break;
12452
12520
  case 'down':
12453
12521
  start = {
12454
- x: width / 2,
12455
- y: 0.8 * height
12522
+ x: Math.round(width / 2),
12523
+ y: Math.round(0.8 * height)
12456
12524
  };
12457
12525
  break;
12458
12526
  case 'left':
12459
12527
  start = {
12460
- x: 0.8 * width,
12461
- y: height / 2
12528
+ x: Math.round(0.8 * width),
12529
+ y: Math.round(height / 2)
12462
12530
  };
12463
12531
  break;
12464
12532
  case 'right':
12465
12533
  start = {
12466
- x: 0.2 * width,
12467
- y: height / 2
12534
+ x: Math.round(0.2 * width),
12535
+ y: Math.round(height / 2)
12468
12536
  };
12469
12537
  break;
12470
12538
  }
@@ -12491,7 +12559,7 @@ ScreenSize: ${size.width}x${size.height} (DPR: ${size.scale})
12491
12559
  break;
12492
12560
  }
12493
12561
  lastScreenshot = currentScreenshot;
12494
- const scrollDistance = 'left' === direction || 'right' === direction ? 0.6 * width : 0.6 * height;
12562
+ const scrollDistance = Math.round('left' === direction || 'right' === direction ? 0.6 * width : 0.6 * height);
12495
12563
  debugDevice(`Performing scroll: ${direction}, distance: ${scrollDistance}`);
12496
12564
  switch(direction){
12497
12565
  case 'up':
@@ -12528,9 +12596,9 @@ ScreenSize: ${size.width}x${size.height} (DPR: ${size.scale})
12528
12596
  try {
12529
12597
  debugDevice('Triggering app switcher with slow swipe up gesture');
12530
12598
  const { width, height } = await this.size();
12531
- const centerX = width / 2;
12532
- const startY = height - 5;
12533
- const endY = 0.5 * height;
12599
+ const centerX = Math.round(width / 2);
12600
+ const startY = Math.round(height - 5);
12601
+ const endY = Math.round(0.5 * height);
12534
12602
  await this.wdaBackend.swipe(centerX, startY, centerX, endY, 1500);
12535
12603
  await sleep(800);
12536
12604
  } catch (error) {
@@ -12548,9 +12616,9 @@ ScreenSize: ${size.width}x${size.height} (DPR: ${size.scale})
12548
12616
  return true;
12549
12617
  }
12550
12618
  const windowSize = await this.wdaBackend.getWindowSize();
12551
- const centerX = windowSize.width / 2;
12552
- const startY = 0.33 * windowSize.height;
12553
- const endY = 0.33 * windowSize.height + 10;
12619
+ const centerX = Math.round(windowSize.width / 2);
12620
+ const startY = Math.round(0.33 * windowSize.height);
12621
+ const endY = Math.round(0.33 * windowSize.height + 10);
12554
12622
  await this.swipe(centerX, startY, centerX, endY, 50);
12555
12623
  debugDevice('Dismissed keyboard with swipe down gesture at screen one-third position');
12556
12624
  await sleep(300);
@@ -12716,17 +12784,14 @@ const main = async ()=>{
12716
12784
  host: 'localhost',
12717
12785
  port: DEFAULT_WDA_PORT
12718
12786
  };
12719
- let device;
12720
- let agent;
12721
12787
  let connected = false;
12722
12788
  while(!connected)try {
12723
- device = new device_IOSDevice({
12789
+ const device = new device_IOSDevice({
12724
12790
  wdaHost: wdaConfig.host,
12725
12791
  wdaPort: wdaConfig.port
12726
12792
  });
12727
12793
  console.log(`\u{1F50C} Connecting to WebDriverAgent at ${wdaConfig.host}:${wdaConfig.port}...`);
12728
12794
  await device.connect();
12729
- agent = new IOSAgent(device);
12730
12795
  connected = true;
12731
12796
  const deviceInfo = await device.getConnectedDeviceInfo();
12732
12797
  console.log("\u2705 Connected to WebDriverAgent successfully!");
@@ -12778,7 +12843,15 @@ const main = async ()=>{
12778
12843
  `);
12779
12844
  else if ('configure' === action) wdaConfig = await configureWebDriverAgent();
12780
12845
  }
12781
- const playgroundServer = new PlaygroundServer(device, agent, staticDir);
12846
+ const agentFactory = async ()=>{
12847
+ const newDevice = new device_IOSDevice({
12848
+ wdaHost: wdaConfig.host,
12849
+ wdaPort: wdaConfig.port
12850
+ });
12851
+ await newDevice.connect();
12852
+ return new IOSAgent(newDevice);
12853
+ };
12854
+ const playgroundServer = new PlaygroundServer(agentFactory, staticDir);
12782
12855
  console.log("\uD83D\uDE80 Starting server...");
12783
12856
  const availablePlaygroundPort = await findAvailablePort(constants_PLAYGROUND_SERVER_PORT);
12784
12857
  if (availablePlaygroundPort !== constants_PLAYGROUND_SERVER_PORT) console.log(`\u{26A0}\u{FE0F} Port ${constants_PLAYGROUND_SERVER_PORT} is busy, using port ${availablePlaygroundPort} instead`);