@midscene/web 0.11.1 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/es/appium.js +84 -39
  2. package/dist/es/bridge-mode-browser.js +58 -19
  3. package/dist/es/bridge-mode.js +61 -43
  4. package/dist/es/chrome-extension.js +104 -51
  5. package/dist/es/index.js +291 -218
  6. package/dist/es/midscene-playground.js +56 -38
  7. package/dist/es/playground.js +56 -38
  8. package/dist/es/playwright.js +251 -203
  9. package/dist/es/puppeteer.js +85 -60
  10. package/dist/es/ui-utils.js +43 -0
  11. package/dist/lib/appium.js +84 -39
  12. package/dist/lib/bridge-mode-browser.js +58 -19
  13. package/dist/lib/bridge-mode.js +61 -43
  14. package/dist/lib/chrome-extension.js +104 -51
  15. package/dist/lib/index.js +291 -218
  16. package/dist/lib/midscene-playground.js +56 -38
  17. package/dist/lib/playground.js +56 -38
  18. package/dist/lib/playwright.js +251 -203
  19. package/dist/lib/puppeteer.js +85 -60
  20. package/dist/lib/ui-utils.js +43 -0
  21. package/dist/types/{tasks-d5a01262.d.ts → agent-ac363fa3.d.ts} +41 -41
  22. package/dist/types/appium.d.ts +2 -2
  23. package/dist/types/bridge-mode-browser.d.ts +2 -2
  24. package/dist/types/bridge-mode.d.ts +3 -3
  25. package/dist/types/{browser-7d5614fb.d.ts → browser-eae1a5c1.d.ts} +4 -4
  26. package/dist/types/chrome-extension.d.ts +3 -3
  27. package/dist/types/index.d.ts +8 -8
  28. package/dist/types/{page-77af8d5f.d.ts → page-cf0f892e.d.ts} +32 -6
  29. package/dist/types/playground.d.ts +3 -3
  30. package/dist/types/playwright.d.ts +12 -6
  31. package/dist/types/puppeteer.d.ts +2 -2
  32. package/dist/types/ui-utils.d.ts +6 -1
  33. package/dist/types/{utils-1a3bc661.d.ts → utils-9a29bfa0.d.ts} +1 -1
  34. package/dist/types/utils.d.ts +1 -1
  35. package/dist/types/yaml.d.ts +3 -3
  36. package/package.json +6 -6
@@ -3497,7 +3497,9 @@ var TaskCache = class {
3497
3497
  if (!this.midscenePkgInfo) {
3498
3498
  return void 0;
3499
3499
  }
3500
- if (jsonData.pkgName !== this.midscenePkgInfo.name || jsonData.pkgVersion !== this.midscenePkgInfo.version) {
3500
+ const jsonDataPkgVersion = jsonData.pkgVersion.split(".");
3501
+ const midscenePkgInfoPkgVersion = this.midscenePkgInfo.version.split(".");
3502
+ if (jsonDataPkgVersion[0] !== midscenePkgInfoPkgVersion[0] || jsonDataPkgVersion[1] !== midscenePkgInfoPkgVersion[1]) {
3501
3503
  return void 0;
3502
3504
  }
3503
3505
  return jsonData;
@@ -3531,6 +3533,79 @@ var TaskCache = class {
3531
3533
  }
3532
3534
  };
3533
3535
 
3536
+ // src/common/ui-utils.ts
3537
+ function typeStr(task) {
3538
+ return task.subType ? `${task.type} / ${task.subType || ""}` : task.type;
3539
+ }
3540
+ function getKeyCommands(value) {
3541
+ const keys = Array.isArray(value) ? value : [value];
3542
+ return keys.reduce((acc, k) => {
3543
+ const includeMeta = keys.includes("Meta") || keys.includes("Control");
3544
+ if (includeMeta && (k === "a" || k === "A")) {
3545
+ return acc.concat([{ key: k, command: "SelectAll" }]);
3546
+ }
3547
+ if (includeMeta && (k === "c" || k === "C")) {
3548
+ return acc.concat([{ key: k, command: "Copy" }]);
3549
+ }
3550
+ if (includeMeta && (k === "v" || k === "V")) {
3551
+ return acc.concat([{ key: k, command: "Paste" }]);
3552
+ }
3553
+ return acc.concat([{ key: k }]);
3554
+ }, []);
3555
+ }
3556
+ function paramStr(task) {
3557
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k;
3558
+ let value;
3559
+ if (task.type === "Planning") {
3560
+ value = (_a = task == null ? void 0 : task.param) == null ? void 0 : _a.userPrompt;
3561
+ }
3562
+ if (task.type === "Insight") {
3563
+ value = ((_b = task == null ? void 0 : task.param) == null ? void 0 : _b.prompt) || ((_c = task == null ? void 0 : task.param) == null ? void 0 : _c.id) || ((_d = task == null ? void 0 : task.param) == null ? void 0 : _d.dataDemand) || ((_e = task == null ? void 0 : task.param) == null ? void 0 : _e.assertion);
3564
+ }
3565
+ if (task.type === "Action") {
3566
+ const sleepMs = (_f = task == null ? void 0 : task.param) == null ? void 0 : _f.timeMs;
3567
+ const scrollType = (_g = task == null ? void 0 : task.param) == null ? void 0 : _g.scrollType;
3568
+ if (sleepMs) {
3569
+ value = `${sleepMs}ms`;
3570
+ } else if (scrollType) {
3571
+ const scrollDirection = (_h = task == null ? void 0 : task.param) == null ? void 0 : _h.direction;
3572
+ const scrollDistance = (_i = task == null ? void 0 : task.param) == null ? void 0 : _i.distance;
3573
+ value = `${scrollDirection || "down"}, ${scrollType || "once"}, ${scrollDistance || "distance-not-set"}`;
3574
+ } else {
3575
+ value = ((_j = task == null ? void 0 : task.param) == null ? void 0 : _j.value) || ((_k = task == null ? void 0 : task.param) == null ? void 0 : _k.scrollType);
3576
+ }
3577
+ if (!value) {
3578
+ value = task.thought;
3579
+ }
3580
+ }
3581
+ if (typeof value === "undefined")
3582
+ return "";
3583
+ return typeof value === "string" ? value : JSON.stringify(value, void 0, 2);
3584
+ }
3585
+ var limitOpenNewTabScript = `
3586
+ if (!window.__MIDSCENE_NEW_TAB_INTERCEPTOR_INITIALIZED__) {
3587
+ window.__MIDSCENE_NEW_TAB_INTERCEPTOR_INITIALIZED__ = true;
3588
+
3589
+ // Intercept the window.open method (only once)
3590
+ window.open = function(url) {
3591
+ console.log('Blocked window.open:', url);
3592
+ window.location.href = url;
3593
+ return null;
3594
+ };
3595
+
3596
+ // Block all a tag clicks with target="_blank" (only once)
3597
+ document.addEventListener('click', function(e) {
3598
+ const target = e.target.closest('a');
3599
+ if (target && target.target === '_blank') {
3600
+ e.preventDefault();
3601
+ console.log('Blocked new tab:', target.href);
3602
+ window.location.href = target.href;
3603
+ target.removeAttribute('target');
3604
+ }
3605
+ }, true);
3606
+ }
3607
+ `;
3608
+
3534
3609
  // src/common/tasks.ts
3535
3610
  var PageTaskExecutor = class {
3536
3611
  constructor(page, insight, opts) {
@@ -3744,8 +3819,8 @@ var PageTaskExecutor = class {
3744
3819
  thought: plan2.thought,
3745
3820
  locate: plan2.locate,
3746
3821
  executor: async (taskParam) => {
3747
- (0, import_node_assert4.default)(taskParam == null ? void 0 : taskParam.value, "No key to press");
3748
- await this.page.keyboard.press(taskParam.value);
3822
+ const keys = getKeyCommands(taskParam.value);
3823
+ await this.page.keyboard.press(keys);
3749
3824
  }
3750
3825
  };
3751
3826
  tasks.push(taskActionKeyboardPress);
@@ -4153,7 +4228,7 @@ var PageTaskExecutor = class {
4153
4228
  const cacheGroup = this.taskCache.getCacheGroupByPrompt(userPrompt);
4154
4229
  const isCompleted = false;
4155
4230
  let currentActionNumber = 0;
4156
- const maxActionNumber = 20;
4231
+ const maxActionNumber = 40;
4157
4232
  while (!isCompleted && currentActionNumber < maxActionNumber) {
4158
4233
  currentActionNumber++;
4159
4234
  const planningTask = this.planningTaskToGoal(
@@ -4349,40 +4424,6 @@ var PageTaskExecutor = class {
4349
4424
  }
4350
4425
  };
4351
4426
 
4352
- // src/common/ui-utils.ts
4353
- function typeStr(task) {
4354
- return task.subType ? `${task.type} / ${task.subType || ""}` : task.type;
4355
- }
4356
- function paramStr(task) {
4357
- var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k;
4358
- let value;
4359
- if (task.type === "Planning") {
4360
- value = (_a = task == null ? void 0 : task.param) == null ? void 0 : _a.userPrompt;
4361
- }
4362
- if (task.type === "Insight") {
4363
- value = ((_b = task == null ? void 0 : task.param) == null ? void 0 : _b.prompt) || ((_c = task == null ? void 0 : task.param) == null ? void 0 : _c.id) || ((_d = task == null ? void 0 : task.param) == null ? void 0 : _d.dataDemand) || ((_e = task == null ? void 0 : task.param) == null ? void 0 : _e.assertion);
4364
- }
4365
- if (task.type === "Action") {
4366
- const sleepMs = (_f = task == null ? void 0 : task.param) == null ? void 0 : _f.timeMs;
4367
- const scrollType = (_g = task == null ? void 0 : task.param) == null ? void 0 : _g.scrollType;
4368
- if (sleepMs) {
4369
- value = `${sleepMs}ms`;
4370
- } else if (scrollType) {
4371
- const scrollDirection = (_h = task == null ? void 0 : task.param) == null ? void 0 : _h.direction;
4372
- const scrollDistance = (_i = task == null ? void 0 : task.param) == null ? void 0 : _i.distance;
4373
- value = `${scrollDirection || "down"}, ${scrollType || "once"}, ${scrollDistance || "distance-not-set"}`;
4374
- } else {
4375
- value = ((_j = task == null ? void 0 : task.param) == null ? void 0 : _j.value) || ((_k = task == null ? void 0 : task.param) == null ? void 0 : _k.scrollType);
4376
- }
4377
- if (!value) {
4378
- value = task.thought;
4379
- }
4380
- }
4381
- if (typeof value === "undefined")
4382
- return "";
4383
- return typeof value === "string" ? value : JSON.stringify(value, void 0, 2);
4384
- }
4385
-
4386
4427
  // src/common/agent.ts
4387
4428
  var PageAgent = class {
4388
4429
  constructor(page, opts) {
@@ -4786,13 +4827,13 @@ var injectStopWaterFlowAnimation = async () => {
4786
4827
  };
4787
4828
 
4788
4829
  // src/chrome-extension/page.ts
4789
- function sleep2(ms) {
4830
+ function sleep3(ms) {
4790
4831
  return new Promise((resolve) => setTimeout(resolve, ms));
4791
4832
  }
4792
4833
  var ChromeExtensionProxyPage = class {
4793
- constructor(trackingActiveTab) {
4834
+ constructor(forceSameTabNavigation) {
4794
4835
  this.pageType = "chrome-extension-proxy";
4795
- this.version = "0.11.1";
4836
+ this.version = "0.11.2";
4796
4837
  this.activeTabId = null;
4797
4838
  this.tabIdOfDebuggerAttached = null;
4798
4839
  this.attachingDebugger = null;
@@ -4861,23 +4902,24 @@ var ChromeExtensionProxyPage = class {
4861
4902
  });
4862
4903
  await cdpKeyboard.type(text, { delay: 0 });
4863
4904
  },
4864
- press: async (key) => {
4905
+ press: async (action) => {
4865
4906
  const cdpKeyboard = new CdpKeyboard({
4866
4907
  send: this.sendCommandToDebugger.bind(this)
4867
4908
  });
4868
- const keys = Array.isArray(key) ? key : [key];
4909
+ const keys = Array.isArray(action) ? action : [action];
4869
4910
  for (const k of keys) {
4870
- await cdpKeyboard.down(k);
4911
+ const commands = k.command ? [k.command] : [];
4912
+ await cdpKeyboard.down(k.key, { commands });
4871
4913
  }
4872
4914
  for (const k of [...keys].reverse()) {
4873
- await cdpKeyboard.up(k);
4915
+ await cdpKeyboard.up(k.key);
4874
4916
  }
4875
4917
  }
4876
4918
  };
4877
- this.trackingActiveTab = trackingActiveTab;
4919
+ this.forceSameTabNavigation = forceSameTabNavigation;
4878
4920
  }
4879
4921
  async getTabId() {
4880
- if (this.activeTabId && !this.trackingActiveTab) {
4922
+ if (this.activeTabId && !this.forceSameTabNavigation) {
4881
4923
  return this.activeTabId;
4882
4924
  }
4883
4925
  const tabId = await chrome.tabs.query({ active: true, currentWindow: true }).then((tabs) => {
@@ -4919,7 +4961,7 @@ var ChromeExtensionProxyPage = class {
4919
4961
  }
4920
4962
  }
4921
4963
  await chrome.debugger.attach({ tabId: currentTabId }, "1.3");
4922
- await sleep2(500);
4964
+ await sleep3(500);
4923
4965
  this.tabIdOfDebuggerAttached = currentTabId;
4924
4966
  await this.enableWaterFlowAnimation();
4925
4967
  } catch (error) {
@@ -4959,11 +5001,20 @@ var ChromeExtensionProxyPage = class {
4959
5001
  return;
4960
5002
  }
4961
5003
  await this.disableWaterFlowAnimation(tabIdToDetach);
4962
- await sleep2(200);
5004
+ await sleep3(200);
4963
5005
  await chrome.debugger.detach({ tabId: tabIdToDetach });
4964
5006
  this.tabIdOfDebuggerAttached = null;
4965
5007
  }
4966
5008
  async enableWaterFlowAnimation() {
5009
+ if (this.forceSameTabNavigation) {
5010
+ await chrome.debugger.sendCommand(
5011
+ { tabId: this.tabIdOfDebuggerAttached },
5012
+ "Runtime.evaluate",
5013
+ {
5014
+ expression: limitOpenNewTabScript
5015
+ }
5016
+ );
5017
+ }
4967
5018
  const script = await injectWaterFlowAnimation();
4968
5019
  await chrome.debugger.sendCommand(
4969
5020
  { tabId: this.tabIdOfDebuggerAttached },
@@ -5149,8 +5200,10 @@ var ChromeExtensionProxyPage = class {
5149
5200
  type: "keyUp",
5150
5201
  commands: ["selectAll"]
5151
5202
  });
5152
- await sleep2(100);
5153
- await this.keyboard.press("Backspace");
5203
+ await sleep3(100);
5204
+ await this.keyboard.press({
5205
+ key: "Backspace"
5206
+ });
5154
5207
  }
5155
5208
  async destroy() {
5156
5209
  this.activeTabId = null;