@midscene/web 0.11.1 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/es/appium.js +84 -39
  2. package/dist/es/bridge-mode-browser.js +58 -19
  3. package/dist/es/bridge-mode.js +61 -43
  4. package/dist/es/chrome-extension.js +104 -51
  5. package/dist/es/index.js +291 -218
  6. package/dist/es/midscene-playground.js +56 -38
  7. package/dist/es/playground.js +56 -38
  8. package/dist/es/playwright.js +251 -203
  9. package/dist/es/puppeteer.js +85 -60
  10. package/dist/es/ui-utils.js +43 -0
  11. package/dist/lib/appium.js +84 -39
  12. package/dist/lib/bridge-mode-browser.js +58 -19
  13. package/dist/lib/bridge-mode.js +61 -43
  14. package/dist/lib/chrome-extension.js +104 -51
  15. package/dist/lib/index.js +291 -218
  16. package/dist/lib/midscene-playground.js +56 -38
  17. package/dist/lib/playground.js +56 -38
  18. package/dist/lib/playwright.js +251 -203
  19. package/dist/lib/puppeteer.js +85 -60
  20. package/dist/lib/ui-utils.js +43 -0
  21. package/dist/types/{tasks-d5a01262.d.ts → agent-ac363fa3.d.ts} +41 -41
  22. package/dist/types/appium.d.ts +2 -2
  23. package/dist/types/bridge-mode-browser.d.ts +2 -2
  24. package/dist/types/bridge-mode.d.ts +3 -3
  25. package/dist/types/{browser-7d5614fb.d.ts → browser-eae1a5c1.d.ts} +4 -4
  26. package/dist/types/chrome-extension.d.ts +3 -3
  27. package/dist/types/index.d.ts +8 -8
  28. package/dist/types/{page-77af8d5f.d.ts → page-cf0f892e.d.ts} +32 -6
  29. package/dist/types/playground.d.ts +3 -3
  30. package/dist/types/playwright.d.ts +12 -6
  31. package/dist/types/puppeteer.d.ts +2 -2
  32. package/dist/types/ui-utils.d.ts +6 -1
  33. package/dist/types/{utils-1a3bc661.d.ts → utils-9a29bfa0.d.ts} +1 -1
  34. package/dist/types/utils.d.ts +1 -1
  35. package/dist/types/yaml.d.ts +3 -3
  36. package/package.json +6 -6
@@ -3364,7 +3364,9 @@ var TaskCache = class {
3364
3364
  if (!this.midscenePkgInfo) {
3365
3365
  return void 0;
3366
3366
  }
3367
- if (jsonData.pkgName !== this.midscenePkgInfo.name || jsonData.pkgVersion !== this.midscenePkgInfo.version) {
3367
+ const jsonDataPkgVersion = jsonData.pkgVersion.split(".");
3368
+ const midscenePkgInfoPkgVersion = this.midscenePkgInfo.version.split(".");
3369
+ if (jsonDataPkgVersion[0] !== midscenePkgInfoPkgVersion[0] || jsonDataPkgVersion[1] !== midscenePkgInfoPkgVersion[1]) {
3368
3370
  return void 0;
3369
3371
  }
3370
3372
  return jsonData;
@@ -3398,6 +3400,56 @@ var TaskCache = class {
3398
3400
  }
3399
3401
  };
3400
3402
 
3403
+ // src/common/ui-utils.ts
3404
+ function typeStr(task) {
3405
+ return task.subType ? `${task.type} / ${task.subType || ""}` : task.type;
3406
+ }
3407
+ function getKeyCommands(value) {
3408
+ const keys = Array.isArray(value) ? value : [value];
3409
+ return keys.reduce((acc, k) => {
3410
+ const includeMeta = keys.includes("Meta") || keys.includes("Control");
3411
+ if (includeMeta && (k === "a" || k === "A")) {
3412
+ return acc.concat([{ key: k, command: "SelectAll" }]);
3413
+ }
3414
+ if (includeMeta && (k === "c" || k === "C")) {
3415
+ return acc.concat([{ key: k, command: "Copy" }]);
3416
+ }
3417
+ if (includeMeta && (k === "v" || k === "V")) {
3418
+ return acc.concat([{ key: k, command: "Paste" }]);
3419
+ }
3420
+ return acc.concat([{ key: k }]);
3421
+ }, []);
3422
+ }
3423
+ function paramStr(task) {
3424
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k;
3425
+ let value;
3426
+ if (task.type === "Planning") {
3427
+ value = (_a = task == null ? void 0 : task.param) == null ? void 0 : _a.userPrompt;
3428
+ }
3429
+ if (task.type === "Insight") {
3430
+ value = ((_b = task == null ? void 0 : task.param) == null ? void 0 : _b.prompt) || ((_c = task == null ? void 0 : task.param) == null ? void 0 : _c.id) || ((_d = task == null ? void 0 : task.param) == null ? void 0 : _d.dataDemand) || ((_e = task == null ? void 0 : task.param) == null ? void 0 : _e.assertion);
3431
+ }
3432
+ if (task.type === "Action") {
3433
+ const sleepMs = (_f = task == null ? void 0 : task.param) == null ? void 0 : _f.timeMs;
3434
+ const scrollType = (_g = task == null ? void 0 : task.param) == null ? void 0 : _g.scrollType;
3435
+ if (sleepMs) {
3436
+ value = `${sleepMs}ms`;
3437
+ } else if (scrollType) {
3438
+ const scrollDirection = (_h = task == null ? void 0 : task.param) == null ? void 0 : _h.direction;
3439
+ const scrollDistance = (_i = task == null ? void 0 : task.param) == null ? void 0 : _i.distance;
3440
+ value = `${scrollDirection || "down"}, ${scrollType || "once"}, ${scrollDistance || "distance-not-set"}`;
3441
+ } else {
3442
+ value = ((_j = task == null ? void 0 : task.param) == null ? void 0 : _j.value) || ((_k = task == null ? void 0 : task.param) == null ? void 0 : _k.scrollType);
3443
+ }
3444
+ if (!value) {
3445
+ value = task.thought;
3446
+ }
3447
+ }
3448
+ if (typeof value === "undefined")
3449
+ return "";
3450
+ return typeof value === "string" ? value : JSON.stringify(value, void 0, 2);
3451
+ }
3452
+
3401
3453
  // src/common/tasks.ts
3402
3454
  var PageTaskExecutor = class {
3403
3455
  constructor(page, insight, opts) {
@@ -3611,8 +3663,8 @@ var PageTaskExecutor = class {
3611
3663
  thought: plan2.thought,
3612
3664
  locate: plan2.locate,
3613
3665
  executor: async (taskParam) => {
3614
- (0, import_node_assert3.default)(taskParam == null ? void 0 : taskParam.value, "No key to press");
3615
- await this.page.keyboard.press(taskParam.value);
3666
+ const keys = getKeyCommands(taskParam.value);
3667
+ await this.page.keyboard.press(keys);
3616
3668
  }
3617
3669
  };
3618
3670
  tasks.push(taskActionKeyboardPress);
@@ -4020,7 +4072,7 @@ var PageTaskExecutor = class {
4020
4072
  const cacheGroup = this.taskCache.getCacheGroupByPrompt(userPrompt);
4021
4073
  const isCompleted = false;
4022
4074
  let currentActionNumber = 0;
4023
- const maxActionNumber = 20;
4075
+ const maxActionNumber = 40;
4024
4076
  while (!isCompleted && currentActionNumber < maxActionNumber) {
4025
4077
  currentActionNumber++;
4026
4078
  const planningTask = this.planningTaskToGoal(
@@ -4240,40 +4292,6 @@ var WebElementInfo = class {
4240
4292
  }
4241
4293
  };
4242
4294
 
4243
- // src/common/ui-utils.ts
4244
- function typeStr(task) {
4245
- return task.subType ? `${task.type} / ${task.subType || ""}` : task.type;
4246
- }
4247
- function paramStr(task) {
4248
- var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k;
4249
- let value;
4250
- if (task.type === "Planning") {
4251
- value = (_a = task == null ? void 0 : task.param) == null ? void 0 : _a.userPrompt;
4252
- }
4253
- if (task.type === "Insight") {
4254
- value = ((_b = task == null ? void 0 : task.param) == null ? void 0 : _b.prompt) || ((_c = task == null ? void 0 : task.param) == null ? void 0 : _c.id) || ((_d = task == null ? void 0 : task.param) == null ? void 0 : _d.dataDemand) || ((_e = task == null ? void 0 : task.param) == null ? void 0 : _e.assertion);
4255
- }
4256
- if (task.type === "Action") {
4257
- const sleepMs = (_f = task == null ? void 0 : task.param) == null ? void 0 : _f.timeMs;
4258
- const scrollType = (_g = task == null ? void 0 : task.param) == null ? void 0 : _g.scrollType;
4259
- if (sleepMs) {
4260
- value = `${sleepMs}ms`;
4261
- } else if (scrollType) {
4262
- const scrollDirection = (_h = task == null ? void 0 : task.param) == null ? void 0 : _h.direction;
4263
- const scrollDistance = (_i = task == null ? void 0 : task.param) == null ? void 0 : _i.distance;
4264
- value = `${scrollDirection || "down"}, ${scrollType || "once"}, ${scrollDistance || "distance-not-set"}`;
4265
- } else {
4266
- value = ((_j = task == null ? void 0 : task.param) == null ? void 0 : _j.value) || ((_k = task == null ? void 0 : task.param) == null ? void 0 : _k.scrollType);
4267
- }
4268
- if (!value) {
4269
- value = task.thought;
4270
- }
4271
- }
4272
- if (typeof value === "undefined")
4273
- return "";
4274
- return typeof value === "string" ? value : JSON.stringify(value, void 0, 2);
4275
- }
4276
-
4277
4295
  // src/common/utils.ts
4278
4296
  var import_node_assert4 = __toESM(require("assert"));
4279
4297
  var import_node_fs3 = require("fs");
@@ -4581,12 +4599,19 @@ var Page = class {
4581
4599
  this.underlyingPage = underlyingPage;
4582
4600
  this.pageType = pageType;
4583
4601
  }
4602
+ async waitForNavigation() {
4603
+ if (this.pageType === "puppeteer" || this.pageType === "playwright") {
4604
+ await this.underlyingPage.waitForSelector("html");
4605
+ }
4606
+ }
4584
4607
  // @deprecated
4585
4608
  async getElementsInfo() {
4609
+ await this.waitForNavigation();
4586
4610
  const tree = await this.getElementsNodeTree();
4587
4611
  return (0, import_extractor2.treeToList)(tree);
4588
4612
  }
4589
4613
  async getElementsNodeTree() {
4614
+ await this.waitForNavigation();
4590
4615
  const scripts = await getExtraReturnLogic(true);
4591
4616
  const captureElementSnapshot = await this.evaluate(scripts);
4592
4617
  return captureElementSnapshot;
@@ -4607,6 +4632,7 @@ var Page = class {
4607
4632
  async screenshotBase64() {
4608
4633
  const imgType = "jpeg";
4609
4634
  const path2 = (0, import_utils9.getTmpFile)(imgType);
4635
+ await this.waitForNavigation();
4610
4636
  await this.underlyingPage.screenshot({
4611
4637
  path: path2,
4612
4638
  type: imgType,
@@ -4664,13 +4690,14 @@ var Page = class {
4664
4690
  get keyboard() {
4665
4691
  return {
4666
4692
  type: async (text) => this.underlyingPage.keyboard.type(text, { delay: 80 }),
4667
- press: async (key) => {
4668
- const keys = Array.isArray(key) ? key : [key];
4669
- for (const key2 of keys) {
4670
- await this.underlyingPage.keyboard.down(key2);
4693
+ press: async (action) => {
4694
+ const keys = Array.isArray(action) ? action : [action];
4695
+ for (const k of keys) {
4696
+ const commands = k.command ? [k.command] : [];
4697
+ await this.underlyingPage.keyboard.down(k.key, { commands });
4671
4698
  }
4672
- for (const key2 of [...keys].reverse()) {
4673
- await this.underlyingPage.keyboard.up(key2);
4699
+ for (const k of [...keys].reverse()) {
4700
+ await this.underlyingPage.keyboard.up(k.key);
4674
4701
  }
4675
4702
  },
4676
4703
  down: async (key) => {
@@ -4705,7 +4732,7 @@ var Page = class {
4705
4732
  await this.underlyingPage.keyboard.up("Control");
4706
4733
  }
4707
4734
  await (0, import_utils9.sleep)(100);
4708
- await this.keyboard.press("Backspace");
4735
+ await this.keyboard.press([{ key: "Backspace" }]);
4709
4736
  }
4710
4737
  async moveToPoint(point) {
4711
4738
  if (point) {
@@ -4893,7 +4920,7 @@ async function puppeteerAgentForTarget(target, preference) {
4893
4920
  const agent = new PuppeteerAgent(page, {
4894
4921
  autoPrintReportMsg: false,
4895
4922
  testId: preference == null ? void 0 : preference.testId,
4896
- trackingActiveTab: typeof target.trackingActiveTab !== "undefined" ? target.trackingActiveTab : true
4923
+ forceSameTabNavigation: typeof target.forceSameTabNavigation !== "undefined" ? target.forceSameTabNavigation : true
4897
4924
  // true for default in yaml script
4898
4925
  });
4899
4926
  freeFn.push({
@@ -4908,21 +4935,19 @@ var PuppeteerAgent = class extends PageAgent {
4908
4935
  constructor(page, opts) {
4909
4936
  const webPage = new WebPage(page);
4910
4937
  super(webPage, opts);
4911
- if (opts == null ? void 0 : opts.trackingActiveTab) {
4912
- const browser = this.page.underlyingPage.browser();
4913
- browser.on("targetcreated", async (target) => {
4914
- if (target.type() === "page") {
4915
- const targetPage = await target.page();
4916
- if (!targetPage) {
4917
- console.warn(
4918
- "got a targetPage event, but the page is not ready yet, skip"
4919
- );
4920
- return;
4921
- }
4922
- const midscenePage = new WebPage(targetPage);
4923
- this.page = midscenePage;
4924
- this.taskExecutor.page = midscenePage;
4938
+ const { forceSameTabNavigation = true } = opts != null ? opts : {};
4939
+ if (forceSameTabNavigation) {
4940
+ page.on("popup", async (popup) => {
4941
+ if (!popup) {
4942
+ console.warn(
4943
+ "got a popup event, but the popup is not ready yet, skip"
4944
+ );
4945
+ return;
4925
4946
  }
4947
+ const url = await popup.url();
4948
+ console.log(`Popup opened: ${url}`);
4949
+ await popup.close();
4950
+ await page.goto(url);
4926
4951
  });
4927
4952
  }
4928
4953
  }
@@ -20,6 +20,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
20
20
  // src/common/ui-utils.ts
21
21
  var ui_utils_exports = {};
22
22
  __export(ui_utils_exports, {
23
+ getKeyCommands: () => getKeyCommands,
24
+ limitOpenNewTabScript: () => limitOpenNewTabScript,
23
25
  paramStr: () => paramStr,
24
26
  typeStr: () => typeStr
25
27
  });
@@ -27,6 +29,22 @@ module.exports = __toCommonJS(ui_utils_exports);
27
29
  function typeStr(task) {
28
30
  return task.subType ? `${task.type} / ${task.subType || ""}` : task.type;
29
31
  }
32
+ function getKeyCommands(value) {
33
+ const keys = Array.isArray(value) ? value : [value];
34
+ return keys.reduce((acc, k) => {
35
+ const includeMeta = keys.includes("Meta") || keys.includes("Control");
36
+ if (includeMeta && (k === "a" || k === "A")) {
37
+ return acc.concat([{ key: k, command: "SelectAll" }]);
38
+ }
39
+ if (includeMeta && (k === "c" || k === "C")) {
40
+ return acc.concat([{ key: k, command: "Copy" }]);
41
+ }
42
+ if (includeMeta && (k === "v" || k === "V")) {
43
+ return acc.concat([{ key: k, command: "Paste" }]);
44
+ }
45
+ return acc.concat([{ key: k }]);
46
+ }, []);
47
+ }
30
48
  function paramStr(task) {
31
49
  var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k;
32
50
  let value;
@@ -56,8 +74,33 @@ function paramStr(task) {
56
74
  return "";
57
75
  return typeof value === "string" ? value : JSON.stringify(value, void 0, 2);
58
76
  }
77
+ var limitOpenNewTabScript = `
78
+ if (!window.__MIDSCENE_NEW_TAB_INTERCEPTOR_INITIALIZED__) {
79
+ window.__MIDSCENE_NEW_TAB_INTERCEPTOR_INITIALIZED__ = true;
80
+
81
+ // Intercept the window.open method (only once)
82
+ window.open = function(url) {
83
+ console.log('Blocked window.open:', url);
84
+ window.location.href = url;
85
+ return null;
86
+ };
87
+
88
+ // Block all a tag clicks with target="_blank" (only once)
89
+ document.addEventListener('click', function(e) {
90
+ const target = e.target.closest('a');
91
+ if (target && target.target === '_blank') {
92
+ e.preventDefault();
93
+ console.log('Blocked new tab:', target.href);
94
+ window.location.href = target.href;
95
+ target.removeAttribute('target');
96
+ }
97
+ }, true);
98
+ }
99
+ `;
59
100
  // Annotate the CommonJS export names for ESM import in node:
60
101
  0 && (module.exports = {
102
+ getKeyCommands,
103
+ limitOpenNewTabScript,
61
104
  paramStr,
62
105
  typeStr
63
106
  });
@@ -3362,7 +3362,9 @@ var TaskCache = class {
3362
3362
  if (!this.midscenePkgInfo) {
3363
3363
  return void 0;
3364
3364
  }
3365
- if (jsonData.pkgName !== this.midscenePkgInfo.name || jsonData.pkgVersion !== this.midscenePkgInfo.version) {
3365
+ const jsonDataPkgVersion = jsonData.pkgVersion.split(".");
3366
+ const midscenePkgInfoPkgVersion = this.midscenePkgInfo.version.split(".");
3367
+ if (jsonDataPkgVersion[0] !== midscenePkgInfoPkgVersion[0] || jsonDataPkgVersion[1] !== midscenePkgInfoPkgVersion[1]) {
3366
3368
  return void 0;
3367
3369
  }
3368
3370
  return jsonData;
@@ -3396,6 +3398,56 @@ var TaskCache = class {
3396
3398
  }
3397
3399
  };
3398
3400
 
3401
+ // src/common/ui-utils.ts
3402
+ function typeStr(task) {
3403
+ return task.subType ? `${task.type} / ${task.subType || ""}` : task.type;
3404
+ }
3405
+ function getKeyCommands(value) {
3406
+ const keys = Array.isArray(value) ? value : [value];
3407
+ return keys.reduce((acc, k) => {
3408
+ const includeMeta = keys.includes("Meta") || keys.includes("Control");
3409
+ if (includeMeta && (k === "a" || k === "A")) {
3410
+ return acc.concat([{ key: k, command: "SelectAll" }]);
3411
+ }
3412
+ if (includeMeta && (k === "c" || k === "C")) {
3413
+ return acc.concat([{ key: k, command: "Copy" }]);
3414
+ }
3415
+ if (includeMeta && (k === "v" || k === "V")) {
3416
+ return acc.concat([{ key: k, command: "Paste" }]);
3417
+ }
3418
+ return acc.concat([{ key: k }]);
3419
+ }, []);
3420
+ }
3421
+ function paramStr(task) {
3422
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k;
3423
+ let value;
3424
+ if (task.type === "Planning") {
3425
+ value = (_a = task == null ? void 0 : task.param) == null ? void 0 : _a.userPrompt;
3426
+ }
3427
+ if (task.type === "Insight") {
3428
+ value = ((_b = task == null ? void 0 : task.param) == null ? void 0 : _b.prompt) || ((_c = task == null ? void 0 : task.param) == null ? void 0 : _c.id) || ((_d = task == null ? void 0 : task.param) == null ? void 0 : _d.dataDemand) || ((_e = task == null ? void 0 : task.param) == null ? void 0 : _e.assertion);
3429
+ }
3430
+ if (task.type === "Action") {
3431
+ const sleepMs = (_f = task == null ? void 0 : task.param) == null ? void 0 : _f.timeMs;
3432
+ const scrollType = (_g = task == null ? void 0 : task.param) == null ? void 0 : _g.scrollType;
3433
+ if (sleepMs) {
3434
+ value = `${sleepMs}ms`;
3435
+ } else if (scrollType) {
3436
+ const scrollDirection = (_h = task == null ? void 0 : task.param) == null ? void 0 : _h.direction;
3437
+ const scrollDistance = (_i = task == null ? void 0 : task.param) == null ? void 0 : _i.distance;
3438
+ value = `${scrollDirection || "down"}, ${scrollType || "once"}, ${scrollDistance || "distance-not-set"}`;
3439
+ } else {
3440
+ value = ((_j = task == null ? void 0 : task.param) == null ? void 0 : _j.value) || ((_k = task == null ? void 0 : task.param) == null ? void 0 : _k.scrollType);
3441
+ }
3442
+ if (!value) {
3443
+ value = task.thought;
3444
+ }
3445
+ }
3446
+ if (typeof value === "undefined")
3447
+ return "";
3448
+ return typeof value === "string" ? value : JSON.stringify(value, void 0, 2);
3449
+ }
3450
+
3399
3451
  // src/common/tasks.ts
3400
3452
  var PageTaskExecutor = class {
3401
3453
  constructor(page, insight, opts) {
@@ -3609,8 +3661,8 @@ var PageTaskExecutor = class {
3609
3661
  thought: plan2.thought,
3610
3662
  locate: plan2.locate,
3611
3663
  executor: async (taskParam) => {
3612
- (0, import_node_assert3.default)(taskParam == null ? void 0 : taskParam.value, "No key to press");
3613
- await this.page.keyboard.press(taskParam.value);
3664
+ const keys = getKeyCommands(taskParam.value);
3665
+ await this.page.keyboard.press(keys);
3614
3666
  }
3615
3667
  };
3616
3668
  tasks.push(taskActionKeyboardPress);
@@ -4018,7 +4070,7 @@ var PageTaskExecutor = class {
4018
4070
  const cacheGroup = this.taskCache.getCacheGroupByPrompt(userPrompt);
4019
4071
  const isCompleted = false;
4020
4072
  let currentActionNumber = 0;
4021
- const maxActionNumber = 20;
4073
+ const maxActionNumber = 40;
4022
4074
  while (!isCompleted && currentActionNumber < maxActionNumber) {
4023
4075
  currentActionNumber++;
4024
4076
  const planningTask = this.planningTaskToGoal(
@@ -4238,40 +4290,6 @@ var WebElementInfo = class {
4238
4290
  }
4239
4291
  };
4240
4292
 
4241
- // src/common/ui-utils.ts
4242
- function typeStr(task) {
4243
- return task.subType ? `${task.type} / ${task.subType || ""}` : task.type;
4244
- }
4245
- function paramStr(task) {
4246
- var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k;
4247
- let value;
4248
- if (task.type === "Planning") {
4249
- value = (_a = task == null ? void 0 : task.param) == null ? void 0 : _a.userPrompt;
4250
- }
4251
- if (task.type === "Insight") {
4252
- value = ((_b = task == null ? void 0 : task.param) == null ? void 0 : _b.prompt) || ((_c = task == null ? void 0 : task.param) == null ? void 0 : _c.id) || ((_d = task == null ? void 0 : task.param) == null ? void 0 : _d.dataDemand) || ((_e = task == null ? void 0 : task.param) == null ? void 0 : _e.assertion);
4253
- }
4254
- if (task.type === "Action") {
4255
- const sleepMs = (_f = task == null ? void 0 : task.param) == null ? void 0 : _f.timeMs;
4256
- const scrollType = (_g = task == null ? void 0 : task.param) == null ? void 0 : _g.scrollType;
4257
- if (sleepMs) {
4258
- value = `${sleepMs}ms`;
4259
- } else if (scrollType) {
4260
- const scrollDirection = (_h = task == null ? void 0 : task.param) == null ? void 0 : _h.direction;
4261
- const scrollDistance = (_i = task == null ? void 0 : task.param) == null ? void 0 : _i.distance;
4262
- value = `${scrollDirection || "down"}, ${scrollType || "once"}, ${scrollDistance || "distance-not-set"}`;
4263
- } else {
4264
- value = ((_j = task == null ? void 0 : task.param) == null ? void 0 : _j.value) || ((_k = task == null ? void 0 : task.param) == null ? void 0 : _k.scrollType);
4265
- }
4266
- if (!value) {
4267
- value = task.thought;
4268
- }
4269
- }
4270
- if (typeof value === "undefined")
4271
- return "";
4272
- return typeof value === "string" ? value : JSON.stringify(value, void 0, 2);
4273
- }
4274
-
4275
4293
  // src/common/utils.ts
4276
4294
  var import_node_assert4 = __toESM(require("assert"));
4277
4295
  var import_node_fs3 = require("fs");
@@ -4607,7 +4625,7 @@ var Page = class {
4607
4625
  get keyboard() {
4608
4626
  return {
4609
4627
  type: (text) => this.keyboardType(text),
4610
- press: (key) => this.keyboardPress(key)
4628
+ press: (action) => this.keyboardPressAction(action)
4611
4629
  };
4612
4630
  }
4613
4631
  async clearInput(element) {
@@ -4721,6 +4739,33 @@ var Page = class {
4721
4739
  }
4722
4740
  ]);
4723
4741
  }
4742
+ async keyboardPressAction(action) {
4743
+ if (Array.isArray(action)) {
4744
+ for (const act of action) {
4745
+ await this.browser.performActions([
4746
+ {
4747
+ type: "key",
4748
+ id: "keyboard",
4749
+ actions: [
4750
+ { type: "keyDown", value: act.key },
4751
+ { type: "keyUp", value: act.key }
4752
+ ]
4753
+ }
4754
+ ]);
4755
+ }
4756
+ } else {
4757
+ await this.browser.performActions([
4758
+ {
4759
+ type: "key",
4760
+ id: "keyboard",
4761
+ actions: [
4762
+ { type: "keyDown", value: action.key },
4763
+ { type: "keyUp", value: action.key }
4764
+ ]
4765
+ }
4766
+ ]);
4767
+ }
4768
+ }
4724
4769
  async mouseClick(x, y, button = "left") {
4725
4770
  await this.mouseMove(x, y);
4726
4771
  await this.browser.performActions([
@@ -61,6 +61,33 @@ var import_node_assert4 = __toESM(require("assert"));
61
61
 
62
62
  // src/chrome-extension/page.ts
63
63
  var import_node_assert2 = __toESM(require("assert"));
64
+
65
+ // src/common/ui-utils.ts
66
+ var limitOpenNewTabScript = `
67
+ if (!window.__MIDSCENE_NEW_TAB_INTERCEPTOR_INITIALIZED__) {
68
+ window.__MIDSCENE_NEW_TAB_INTERCEPTOR_INITIALIZED__ = true;
69
+
70
+ // Intercept the window.open method (only once)
71
+ window.open = function(url) {
72
+ console.log('Blocked window.open:', url);
73
+ window.location.href = url;
74
+ return null;
75
+ };
76
+
77
+ // Block all a tag clicks with target="_blank" (only once)
78
+ document.addEventListener('click', function(e) {
79
+ const target = e.target.closest('a');
80
+ if (target && target.target === '_blank') {
81
+ e.preventDefault();
82
+ console.log('Blocked new tab:', target.href);
83
+ window.location.href = target.href;
84
+ target.removeAttribute('target');
85
+ }
86
+ }, true);
87
+ }
88
+ `;
89
+
90
+ // src/chrome-extension/page.ts
64
91
  var import_extractor = require("@midscene/shared/extractor");
65
92
 
66
93
  // src/chrome-extension/cdpInput.ts
@@ -262,9 +289,9 @@ function sleep(ms) {
262
289
  return new Promise((resolve) => setTimeout(resolve, ms));
263
290
  }
264
291
  var ChromeExtensionProxyPage = class {
265
- constructor(trackingActiveTab) {
292
+ constructor(forceSameTabNavigation) {
266
293
  this.pageType = "chrome-extension-proxy";
267
- this.version = "0.11.1";
294
+ this.version = "0.11.2";
268
295
  this.activeTabId = null;
269
296
  this.tabIdOfDebuggerAttached = null;
270
297
  this.attachingDebugger = null;
@@ -333,23 +360,24 @@ var ChromeExtensionProxyPage = class {
333
360
  });
334
361
  await cdpKeyboard.type(text, { delay: 0 });
335
362
  },
336
- press: async (key) => {
363
+ press: async (action) => {
337
364
  const cdpKeyboard = new CdpKeyboard({
338
365
  send: this.sendCommandToDebugger.bind(this)
339
366
  });
340
- const keys = Array.isArray(key) ? key : [key];
367
+ const keys = Array.isArray(action) ? action : [action];
341
368
  for (const k of keys) {
342
- await cdpKeyboard.down(k);
369
+ const commands = k.command ? [k.command] : [];
370
+ await cdpKeyboard.down(k.key, { commands });
343
371
  }
344
372
  for (const k of [...keys].reverse()) {
345
- await cdpKeyboard.up(k);
373
+ await cdpKeyboard.up(k.key);
346
374
  }
347
375
  }
348
376
  };
349
- this.trackingActiveTab = trackingActiveTab;
377
+ this.forceSameTabNavigation = forceSameTabNavigation;
350
378
  }
351
379
  async getTabId() {
352
- if (this.activeTabId && !this.trackingActiveTab) {
380
+ if (this.activeTabId && !this.forceSameTabNavigation) {
353
381
  return this.activeTabId;
354
382
  }
355
383
  const tabId = await chrome.tabs.query({ active: true, currentWindow: true }).then((tabs) => {
@@ -436,6 +464,15 @@ var ChromeExtensionProxyPage = class {
436
464
  this.tabIdOfDebuggerAttached = null;
437
465
  }
438
466
  async enableWaterFlowAnimation() {
467
+ if (this.forceSameTabNavigation) {
468
+ await chrome.debugger.sendCommand(
469
+ { tabId: this.tabIdOfDebuggerAttached },
470
+ "Runtime.evaluate",
471
+ {
472
+ expression: limitOpenNewTabScript
473
+ }
474
+ );
475
+ }
439
476
  const script = await injectWaterFlowAnimation();
440
477
  await chrome.debugger.sendCommand(
441
478
  { tabId: this.tabIdOfDebuggerAttached },
@@ -622,7 +659,9 @@ var ChromeExtensionProxyPage = class {
622
659
  commands: ["selectAll"]
623
660
  });
624
661
  await sleep(100);
625
- await this.keyboard.press("Backspace");
662
+ await this.keyboard.press({
663
+ key: "Backspace"
664
+ });
626
665
  }
627
666
  async destroy() {
628
667
  this.activeTabId = null;
@@ -651,7 +690,7 @@ var BridgeClient = class {
651
690
  this.socket = (0, import_socket.io)(this.endpoint, {
652
691
  reconnection: false,
653
692
  query: {
654
- version: "0.11.1"
693
+ version: "0.11.2"
655
694
  }
656
695
  });
657
696
  const timeout = setTimeout(() => {
@@ -710,8 +749,8 @@ ${(e == null ? void 0 : e.stack) || ""}`;
710
749
  var ChromeExtensionPageBrowserSide = class extends ChromeExtensionProxyPage {
711
750
  constructor(onDisconnect = () => {
712
751
  }, onLogMessage = () => {
713
- }, trackingActiveTab = false) {
714
- super(trackingActiveTab);
752
+ }, forceSameTabNavigation = true) {
753
+ super(forceSameTabNavigation);
715
754
  this.onDisconnect = onDisconnect;
716
755
  this.onLogMessage = onLogMessage;
717
756
  this.bridgeClient = null;
@@ -773,7 +812,7 @@ var ChromeExtensionPageBrowserSide = class extends ChromeExtensionProxyPage {
773
812
  );
774
813
  await this.bridgeClient.connect();
775
814
  this.onLogMessage(
776
- `Bridge connected, cli-side version v${this.bridgeClient.serverVersion}, browser-side version v${"0.11.1"}`,
815
+ `Bridge connected, cli-side version v${this.bridgeClient.serverVersion}, browser-side version v${"0.11.2"}`,
777
816
  "log"
778
817
  );
779
818
  }
@@ -781,18 +820,18 @@ var ChromeExtensionPageBrowserSide = class extends ChromeExtensionProxyPage {
781
820
  return await this.setupBridgeClient();
782
821
  }
783
822
  async connectNewTabWithUrl(url, options = {
784
- trackingActiveTab: true
823
+ forceSameTabNavigation: true
785
824
  }) {
786
825
  const tab = await chrome.tabs.create({ url });
787
826
  const tabId = tab.id;
788
827
  (0, import_node_assert4.default)(tabId, "failed to get tabId after creating a new tab");
789
828
  this.onLogMessage(`Creating new tab: ${url}`, "log");
790
- if (options == null ? void 0 : options.trackingActiveTab) {
791
- this.trackingActiveTab = true;
829
+ if (options == null ? void 0 : options.forceSameTabNavigation) {
830
+ this.forceSameTabNavigation = true;
792
831
  }
793
832
  }
794
833
  async connectCurrentTab(options = {
795
- trackingActiveTab: true
834
+ forceSameTabNavigation: true
796
835
  }) {
797
836
  var _a, _b;
798
837
  const tabs = await chrome.tabs.query({ active: true, currentWindow: true });
@@ -800,8 +839,8 @@ var ChromeExtensionPageBrowserSide = class extends ChromeExtensionProxyPage {
800
839
  const tabId = (_a = tabs[0]) == null ? void 0 : _a.id;
801
840
  (0, import_node_assert4.default)(tabId, "failed to get tabId");
802
841
  this.onLogMessage(`Connected to current tab: ${(_b = tabs[0]) == null ? void 0 : _b.url}`, "log");
803
- if (options == null ? void 0 : options.trackingActiveTab) {
804
- this.trackingActiveTab = true;
842
+ if (options == null ? void 0 : options.forceSameTabNavigation) {
843
+ this.forceSameTabNavigation = true;
805
844
  }
806
845
  }
807
846
  async destroy() {