@midscene/web 0.11.1 → 0.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/es/appium.js +85 -39
  2. package/dist/es/bridge-mode-browser.js +58 -19
  3. package/dist/es/bridge-mode.js +62 -43
  4. package/dist/es/chrome-extension.js +105 -51
  5. package/dist/es/index.js +292 -218
  6. package/dist/es/midscene-playground.js +57 -38
  7. package/dist/es/playground.js +57 -38
  8. package/dist/es/playwright-report.js +4 -0
  9. package/dist/es/playwright.js +252 -203
  10. package/dist/es/puppeteer.js +86 -60
  11. package/dist/es/ui-utils.js +43 -0
  12. package/dist/lib/appium.js +85 -39
  13. package/dist/lib/bridge-mode-browser.js +58 -19
  14. package/dist/lib/bridge-mode.js +62 -43
  15. package/dist/lib/chrome-extension.js +105 -51
  16. package/dist/lib/index.js +292 -218
  17. package/dist/lib/midscene-playground.js +57 -38
  18. package/dist/lib/playground.js +57 -38
  19. package/dist/lib/playwright-report.js +4 -0
  20. package/dist/lib/playwright.js +252 -203
  21. package/dist/lib/puppeteer.js +86 -60
  22. package/dist/lib/ui-utils.js +43 -0
  23. package/dist/types/{tasks-d5a01262.d.ts → agent-ae110e80.d.ts} +43 -43
  24. package/dist/types/appium.d.ts +3 -3
  25. package/dist/types/bridge-mode-browser.d.ts +3 -3
  26. package/dist/types/bridge-mode.d.ts +4 -4
  27. package/dist/types/{browser-7d5614fb.d.ts → browser-9d620553.d.ts} +4 -4
  28. package/dist/types/chrome-extension.d.ts +4 -4
  29. package/dist/types/index.d.ts +8 -8
  30. package/dist/types/{page-77af8d5f.d.ts → page-97720803.d.ts} +34 -8
  31. package/dist/types/playground.d.ts +4 -4
  32. package/dist/types/playwright.d.ts +13 -7
  33. package/dist/types/puppeteer.d.ts +3 -3
  34. package/dist/types/ui-utils.d.ts +6 -1
  35. package/dist/types/{utils-1a3bc661.d.ts → utils-93b3f5f3.d.ts} +1 -1
  36. package/dist/types/utils.d.ts +2 -2
  37. package/dist/types/yaml.d.ts +4 -4
  38. package/package.json +6 -6
@@ -3497,7 +3497,9 @@ var TaskCache = class {
3497
3497
  if (!this.midscenePkgInfo) {
3498
3498
  return void 0;
3499
3499
  }
3500
- if (jsonData.pkgName !== this.midscenePkgInfo.name || jsonData.pkgVersion !== this.midscenePkgInfo.version) {
3500
+ const jsonDataPkgVersion = jsonData.pkgVersion.split(".");
3501
+ const midscenePkgInfoPkgVersion = this.midscenePkgInfo.version.split(".");
3502
+ if (jsonDataPkgVersion[0] !== midscenePkgInfoPkgVersion[0] || jsonDataPkgVersion[1] !== midscenePkgInfoPkgVersion[1]) {
3501
3503
  return void 0;
3502
3504
  }
3503
3505
  return jsonData;
@@ -3531,6 +3533,79 @@ var TaskCache = class {
3531
3533
  }
3532
3534
  };
3533
3535
 
3536
+ // src/common/ui-utils.ts
3537
+ function typeStr(task) {
3538
+ return task.subType ? `${task.type} / ${task.subType || ""}` : task.type;
3539
+ }
3540
+ function getKeyCommands(value) {
3541
+ const keys = Array.isArray(value) ? value : [value];
3542
+ return keys.reduce((acc, k) => {
3543
+ const includeMeta = keys.includes("Meta") || keys.includes("Control");
3544
+ if (includeMeta && (k === "a" || k === "A")) {
3545
+ return acc.concat([{ key: k, command: "SelectAll" }]);
3546
+ }
3547
+ if (includeMeta && (k === "c" || k === "C")) {
3548
+ return acc.concat([{ key: k, command: "Copy" }]);
3549
+ }
3550
+ if (includeMeta && (k === "v" || k === "V")) {
3551
+ return acc.concat([{ key: k, command: "Paste" }]);
3552
+ }
3553
+ return acc.concat([{ key: k }]);
3554
+ }, []);
3555
+ }
3556
+ function paramStr(task) {
3557
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k;
3558
+ let value;
3559
+ if (task.type === "Planning") {
3560
+ value = (_a = task == null ? void 0 : task.param) == null ? void 0 : _a.userPrompt;
3561
+ }
3562
+ if (task.type === "Insight") {
3563
+ value = ((_b = task == null ? void 0 : task.param) == null ? void 0 : _b.prompt) || ((_c = task == null ? void 0 : task.param) == null ? void 0 : _c.id) || ((_d = task == null ? void 0 : task.param) == null ? void 0 : _d.dataDemand) || ((_e = task == null ? void 0 : task.param) == null ? void 0 : _e.assertion);
3564
+ }
3565
+ if (task.type === "Action") {
3566
+ const sleepMs = (_f = task == null ? void 0 : task.param) == null ? void 0 : _f.timeMs;
3567
+ const scrollType = (_g = task == null ? void 0 : task.param) == null ? void 0 : _g.scrollType;
3568
+ if (sleepMs) {
3569
+ value = `${sleepMs}ms`;
3570
+ } else if (scrollType) {
3571
+ const scrollDirection = (_h = task == null ? void 0 : task.param) == null ? void 0 : _h.direction;
3572
+ const scrollDistance = (_i = task == null ? void 0 : task.param) == null ? void 0 : _i.distance;
3573
+ value = `${scrollDirection || "down"}, ${scrollType || "once"}, ${scrollDistance || "distance-not-set"}`;
3574
+ } else {
3575
+ value = ((_j = task == null ? void 0 : task.param) == null ? void 0 : _j.value) || ((_k = task == null ? void 0 : task.param) == null ? void 0 : _k.scrollType);
3576
+ }
3577
+ if (!value) {
3578
+ value = task.thought;
3579
+ }
3580
+ }
3581
+ if (typeof value === "undefined")
3582
+ return "";
3583
+ return typeof value === "string" ? value : JSON.stringify(value, void 0, 2);
3584
+ }
3585
+ var limitOpenNewTabScript = `
3586
+ if (!window.__MIDSCENE_NEW_TAB_INTERCEPTOR_INITIALIZED__) {
3587
+ window.__MIDSCENE_NEW_TAB_INTERCEPTOR_INITIALIZED__ = true;
3588
+
3589
+ // Intercept the window.open method (only once)
3590
+ window.open = function(url) {
3591
+ console.log('Blocked window.open:', url);
3592
+ window.location.href = url;
3593
+ return null;
3594
+ };
3595
+
3596
+ // Block all a tag clicks with target="_blank" (only once)
3597
+ document.addEventListener('click', function(e) {
3598
+ const target = e.target.closest('a');
3599
+ if (target && target.target === '_blank') {
3600
+ e.preventDefault();
3601
+ console.log('Blocked new tab:', target.href);
3602
+ window.location.href = target.href;
3603
+ target.removeAttribute('target');
3604
+ }
3605
+ }, true);
3606
+ }
3607
+ `;
3608
+
3534
3609
  // src/common/tasks.ts
3535
3610
  var PageTaskExecutor = class {
3536
3611
  constructor(page, insight, opts) {
@@ -3665,6 +3740,7 @@ var PageTaskExecutor = class {
3665
3740
  output: {
3666
3741
  element
3667
3742
  },
3743
+ pageContext,
3668
3744
  log: {
3669
3745
  dump: insightDump
3670
3746
  },
@@ -3744,8 +3820,8 @@ var PageTaskExecutor = class {
3744
3820
  thought: plan2.thought,
3745
3821
  locate: plan2.locate,
3746
3822
  executor: async (taskParam) => {
3747
- (0, import_node_assert4.default)(taskParam == null ? void 0 : taskParam.value, "No key to press");
3748
- await this.page.keyboard.press(taskParam.value);
3823
+ const keys = getKeyCommands(taskParam.value);
3824
+ await this.page.keyboard.press(keys);
3749
3825
  }
3750
3826
  };
3751
3827
  tasks.push(taskActionKeyboardPress);
@@ -4153,7 +4229,7 @@ var PageTaskExecutor = class {
4153
4229
  const cacheGroup = this.taskCache.getCacheGroupByPrompt(userPrompt);
4154
4230
  const isCompleted = false;
4155
4231
  let currentActionNumber = 0;
4156
- const maxActionNumber = 20;
4232
+ const maxActionNumber = 40;
4157
4233
  while (!isCompleted && currentActionNumber < maxActionNumber) {
4158
4234
  currentActionNumber++;
4159
4235
  const planningTask = this.planningTaskToGoal(
@@ -4349,40 +4425,6 @@ var PageTaskExecutor = class {
4349
4425
  }
4350
4426
  };
4351
4427
 
4352
- // src/common/ui-utils.ts
4353
- function typeStr(task) {
4354
- return task.subType ? `${task.type} / ${task.subType || ""}` : task.type;
4355
- }
4356
- function paramStr(task) {
4357
- var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k;
4358
- let value;
4359
- if (task.type === "Planning") {
4360
- value = (_a = task == null ? void 0 : task.param) == null ? void 0 : _a.userPrompt;
4361
- }
4362
- if (task.type === "Insight") {
4363
- value = ((_b = task == null ? void 0 : task.param) == null ? void 0 : _b.prompt) || ((_c = task == null ? void 0 : task.param) == null ? void 0 : _c.id) || ((_d = task == null ? void 0 : task.param) == null ? void 0 : _d.dataDemand) || ((_e = task == null ? void 0 : task.param) == null ? void 0 : _e.assertion);
4364
- }
4365
- if (task.type === "Action") {
4366
- const sleepMs = (_f = task == null ? void 0 : task.param) == null ? void 0 : _f.timeMs;
4367
- const scrollType = (_g = task == null ? void 0 : task.param) == null ? void 0 : _g.scrollType;
4368
- if (sleepMs) {
4369
- value = `${sleepMs}ms`;
4370
- } else if (scrollType) {
4371
- const scrollDirection = (_h = task == null ? void 0 : task.param) == null ? void 0 : _h.direction;
4372
- const scrollDistance = (_i = task == null ? void 0 : task.param) == null ? void 0 : _i.distance;
4373
- value = `${scrollDirection || "down"}, ${scrollType || "once"}, ${scrollDistance || "distance-not-set"}`;
4374
- } else {
4375
- value = ((_j = task == null ? void 0 : task.param) == null ? void 0 : _j.value) || ((_k = task == null ? void 0 : task.param) == null ? void 0 : _k.scrollType);
4376
- }
4377
- if (!value) {
4378
- value = task.thought;
4379
- }
4380
- }
4381
- if (typeof value === "undefined")
4382
- return "";
4383
- return typeof value === "string" ? value : JSON.stringify(value, void 0, 2);
4384
- }
4385
-
4386
4428
  // src/common/agent.ts
4387
4429
  var PageAgent = class {
4388
4430
  constructor(page, opts) {
@@ -4786,13 +4828,13 @@ var injectStopWaterFlowAnimation = async () => {
4786
4828
  };
4787
4829
 
4788
4830
  // src/chrome-extension/page.ts
4789
- function sleep2(ms) {
4831
+ function sleep3(ms) {
4790
4832
  return new Promise((resolve) => setTimeout(resolve, ms));
4791
4833
  }
4792
4834
  var ChromeExtensionProxyPage = class {
4793
- constructor(trackingActiveTab) {
4835
+ constructor(forceSameTabNavigation) {
4794
4836
  this.pageType = "chrome-extension-proxy";
4795
- this.version = "0.11.1";
4837
+ this.version = "0.11.3";
4796
4838
  this.activeTabId = null;
4797
4839
  this.tabIdOfDebuggerAttached = null;
4798
4840
  this.attachingDebugger = null;
@@ -4861,23 +4903,24 @@ var ChromeExtensionProxyPage = class {
4861
4903
  });
4862
4904
  await cdpKeyboard.type(text, { delay: 0 });
4863
4905
  },
4864
- press: async (key) => {
4906
+ press: async (action) => {
4865
4907
  const cdpKeyboard = new CdpKeyboard({
4866
4908
  send: this.sendCommandToDebugger.bind(this)
4867
4909
  });
4868
- const keys = Array.isArray(key) ? key : [key];
4910
+ const keys = Array.isArray(action) ? action : [action];
4869
4911
  for (const k of keys) {
4870
- await cdpKeyboard.down(k);
4912
+ const commands = k.command ? [k.command] : [];
4913
+ await cdpKeyboard.down(k.key, { commands });
4871
4914
  }
4872
4915
  for (const k of [...keys].reverse()) {
4873
- await cdpKeyboard.up(k);
4916
+ await cdpKeyboard.up(k.key);
4874
4917
  }
4875
4918
  }
4876
4919
  };
4877
- this.trackingActiveTab = trackingActiveTab;
4920
+ this.forceSameTabNavigation = forceSameTabNavigation;
4878
4921
  }
4879
4922
  async getTabId() {
4880
- if (this.activeTabId && !this.trackingActiveTab) {
4923
+ if (this.activeTabId && !this.forceSameTabNavigation) {
4881
4924
  return this.activeTabId;
4882
4925
  }
4883
4926
  const tabId = await chrome.tabs.query({ active: true, currentWindow: true }).then((tabs) => {
@@ -4919,7 +4962,7 @@ var ChromeExtensionProxyPage = class {
4919
4962
  }
4920
4963
  }
4921
4964
  await chrome.debugger.attach({ tabId: currentTabId }, "1.3");
4922
- await sleep2(500);
4965
+ await sleep3(500);
4923
4966
  this.tabIdOfDebuggerAttached = currentTabId;
4924
4967
  await this.enableWaterFlowAnimation();
4925
4968
  } catch (error) {
@@ -4959,11 +5002,20 @@ var ChromeExtensionProxyPage = class {
4959
5002
  return;
4960
5003
  }
4961
5004
  await this.disableWaterFlowAnimation(tabIdToDetach);
4962
- await sleep2(200);
5005
+ await sleep3(200);
4963
5006
  await chrome.debugger.detach({ tabId: tabIdToDetach });
4964
5007
  this.tabIdOfDebuggerAttached = null;
4965
5008
  }
4966
5009
  async enableWaterFlowAnimation() {
5010
+ if (this.forceSameTabNavigation) {
5011
+ await chrome.debugger.sendCommand(
5012
+ { tabId: this.tabIdOfDebuggerAttached },
5013
+ "Runtime.evaluate",
5014
+ {
5015
+ expression: limitOpenNewTabScript
5016
+ }
5017
+ );
5018
+ }
4967
5019
  const script = await injectWaterFlowAnimation();
4968
5020
  await chrome.debugger.sendCommand(
4969
5021
  { tabId: this.tabIdOfDebuggerAttached },
@@ -5149,8 +5201,10 @@ var ChromeExtensionProxyPage = class {
5149
5201
  type: "keyUp",
5150
5202
  commands: ["selectAll"]
5151
5203
  });
5152
- await sleep2(100);
5153
- await this.keyboard.press("Backspace");
5204
+ await sleep3(100);
5205
+ await this.keyboard.press({
5206
+ key: "Backspace"
5207
+ });
5154
5208
  }
5155
5209
  async destroy() {
5156
5210
  this.activeTabId = null;