@midscene/web 0.10.1 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/es/appium.js +41 -5
  2. package/dist/es/bridge-mode-browser.js +43 -396
  3. package/dist/es/bridge-mode.js +44 -15
  4. package/dist/es/chrome-extension.js +59 -398
  5. package/dist/es/index.js +96 -9
  6. package/dist/es/midscene-playground.js +26 -5
  7. package/dist/es/playground.js +26 -5
  8. package/dist/es/playwright.js +62 -8
  9. package/dist/es/puppeteer.js +90 -13
  10. package/dist/lib/appium.js +41 -5
  11. package/dist/lib/bridge-mode-browser.js +43 -396
  12. package/dist/lib/bridge-mode.js +44 -15
  13. package/dist/lib/chrome-extension.js +59 -398
  14. package/dist/lib/index.js +96 -9
  15. package/dist/lib/midscene-playground.js +26 -5
  16. package/dist/lib/playground.js +26 -5
  17. package/dist/lib/playwright.js +62 -8
  18. package/dist/lib/puppeteer.js +90 -13
  19. package/dist/types/appium.d.ts +3 -3
  20. package/dist/types/bridge-mode-browser.d.ts +3 -3
  21. package/dist/types/bridge-mode.d.ts +6 -5
  22. package/dist/types/{browser-fd2e9b68.d.ts → browser-bc6c204c.d.ts} +1 -1
  23. package/dist/types/chrome-extension.d.ts +4 -4
  24. package/dist/types/debug.d.ts +2 -2
  25. package/dist/types/index.d.ts +4 -4
  26. package/dist/types/{page-fc3be0ec.d.ts → page-cfd75cdf.d.ts} +34 -4
  27. package/dist/types/playground.d.ts +4 -4
  28. package/dist/types/playwright.d.ts +4 -4
  29. package/dist/types/puppeteer.d.ts +8 -7
  30. package/dist/types/{tasks-f2973dd7.d.ts → tasks-681afd8a.d.ts} +4 -3
  31. package/dist/types/{utils-ccbb6ff6.d.ts → utils-1c43a008.d.ts} +1 -1
  32. package/dist/types/utils.d.ts +2 -2
  33. package/dist/types/yaml.d.ts +4 -4
  34. package/package.json +5 -3
package/dist/lib/index.js CHANGED
@@ -564,7 +564,10 @@ var PageTaskExecutor = class {
564
564
  locate: plan2.locate,
565
565
  executor: async (param, taskContext) => {
566
566
  const { task } = taskContext;
567
- (0, import_node_assert.default)((param == null ? void 0 : param.prompt) || (param == null ? void 0 : param.id), "No prompt or id to locate");
567
+ (0, import_node_assert.default)(
568
+ (param == null ? void 0 : param.prompt) || (param == null ? void 0 : param.id) || (param == null ? void 0 : param.position),
569
+ "No prompt or id or position to locate"
570
+ );
568
571
  let insightDump;
569
572
  const dumpCollector = (dump2) => {
570
573
  insightDump = dump2;
@@ -719,6 +722,22 @@ var PageTaskExecutor = class {
719
722
  }
720
723
  };
721
724
  tasks.push(taskActionTap);
725
+ } else if (plan2.type === "Drag") {
726
+ const taskActionDrag = {
727
+ type: "Action",
728
+ subType: "Drag",
729
+ param: plan2.param,
730
+ thought: plan2.thought,
731
+ locate: plan2.locate,
732
+ executor: async (taskParam) => {
733
+ (0, import_node_assert.default)(
734
+ (taskParam == null ? void 0 : taskParam.start_box) && (taskParam == null ? void 0 : taskParam.end_box),
735
+ "No start_box or end_box to drag"
736
+ );
737
+ await this.page.mouse.drag(taskParam.start_box, taskParam.end_box);
738
+ }
739
+ };
740
+ tasks.push(taskActionDrag);
722
741
  } else if (plan2.type === "Hover") {
723
742
  const taskActionHover = {
724
743
  type: "Action",
@@ -753,9 +772,9 @@ var PageTaskExecutor = class {
753
772
  } else if (scrollToEventName === "untilLeft") {
754
773
  await this.page.scrollUntilLeft(startingPoint);
755
774
  } else if (scrollToEventName === "once" || !scrollToEventName) {
756
- if (taskParam.direction === "down" || !taskParam.direction) {
775
+ if ((taskParam == null ? void 0 : taskParam.direction) === "down" || !taskParam || !taskParam.direction) {
757
776
  await this.page.scrollDown(
758
- taskParam.distance || void 0,
777
+ (taskParam == null ? void 0 : taskParam.distance) || void 0,
759
778
  startingPoint
760
779
  );
761
780
  } else if (taskParam.direction === "up") {
@@ -950,6 +969,7 @@ var PageTaskExecutor = class {
950
969
  userPrompt
951
970
  },
952
971
  executor: async (param, executorContext) => {
972
+ var _a;
953
973
  const shotTime = Date.now();
954
974
  const pageContext = await this.insight.contextRetrieverFn("locate");
955
975
  const recordItem = {
@@ -986,7 +1006,7 @@ var PageTaskExecutor = class {
986
1006
  return {
987
1007
  output: {
988
1008
  actions,
989
- thought: actions[0].thought,
1009
+ thought: (_a = actions[0]) == null ? void 0 : _a.thought,
990
1010
  actionType: actions[0].type,
991
1011
  taskWillBeAccomplished: false,
992
1012
  furtherPlan: {
@@ -1819,15 +1839,49 @@ var Page = class {
1819
1839
  );
1820
1840
  }
1821
1841
  },
1822
- move: async (x, y) => this.underlyingPage.mouse.move(x, y)
1842
+ move: async (x, y) => this.underlyingPage.mouse.move(x, y),
1843
+ drag: async (from, to) => {
1844
+ if (this.pageType === "puppeteer") {
1845
+ await this.underlyingPage.mouse.drag(
1846
+ {
1847
+ x: from.x,
1848
+ y: from.y
1849
+ },
1850
+ {
1851
+ x: to.x,
1852
+ y: to.y
1853
+ }
1854
+ );
1855
+ } else if (this.pageType === "playwright") {
1856
+ await this.underlyingPage.mouse.move(
1857
+ from.x,
1858
+ from.y
1859
+ );
1860
+ await this.underlyingPage.mouse.down();
1861
+ await this.underlyingPage.mouse.move(to.x, to.y);
1862
+ await this.underlyingPage.mouse.up();
1863
+ }
1864
+ }
1823
1865
  };
1824
1866
  }
1825
1867
  get keyboard() {
1826
1868
  return {
1827
1869
  type: async (text) => this.underlyingPage.keyboard.type(text, { delay: 80 }),
1828
- press: async (key) => this.underlyingPage.keyboard.press(key),
1829
- down: async (key) => this.underlyingPage.keyboard.down(key),
1830
- up: async (key) => this.underlyingPage.keyboard.up(key)
1870
+ press: async (key) => {
1871
+ const keys = Array.isArray(key) ? key : [key];
1872
+ for (const key2 of keys) {
1873
+ await this.underlyingPage.keyboard.down(key2);
1874
+ }
1875
+ for (const key2 of [...keys].reverse()) {
1876
+ await this.underlyingPage.keyboard.up(key2);
1877
+ }
1878
+ },
1879
+ down: async (key) => {
1880
+ this.underlyingPage.keyboard.down(key);
1881
+ },
1882
+ up: async (key) => {
1883
+ this.underlyingPage.keyboard.up(key);
1884
+ }
1831
1885
  };
1832
1886
  }
1833
1887
  async clearInput(element) {
@@ -1946,6 +2000,23 @@ var PuppeteerAgent = class extends PageAgent {
1946
2000
  constructor(page, opts) {
1947
2001
  const webPage = new WebPage2(page);
1948
2002
  super(webPage, opts);
2003
+ if (opts == null ? void 0 : opts.trackingActiveTab) {
2004
+ const browser = this.page.underlyingPage.browser();
2005
+ browser.on("targetcreated", async (target) => {
2006
+ if (target.type() === "page") {
2007
+ const targetPage = await target.page();
2008
+ if (!targetPage) {
2009
+ console.warn(
2010
+ "got a targetPage event, but the page is not ready yet, skip"
2011
+ );
2012
+ return;
2013
+ }
2014
+ const midscenePage = new WebPage2(targetPage);
2015
+ this.page = midscenePage;
2016
+ this.taskExecutor.page = midscenePage;
2017
+ }
2018
+ });
2019
+ }
1949
2020
  }
1950
2021
  };
1951
2022
 
@@ -2179,7 +2250,8 @@ var Page2 = class {
2179
2250
  return {
2180
2251
  click: (x, y, options) => this.mouseClick(x, y, (options == null ? void 0 : options.button) || "left"),
2181
2252
  wheel: (deltaX, deltaY) => this.mouseWheel(deltaX, deltaY),
2182
- move: (x, y) => this.mouseMove(x, y)
2253
+ move: (x, y) => this.mouseMove(x, y),
2254
+ drag: (from, to) => this.mouseDrag(from, to)
2183
2255
  };
2184
2256
  }
2185
2257
  // Object that includes keyboard and mouse operations
@@ -2326,6 +2398,21 @@ var Page2 = class {
2326
2398
  }
2327
2399
  ]);
2328
2400
  }
2401
+ async mouseDrag(from, to) {
2402
+ await this.browser.performActions([
2403
+ {
2404
+ type: "pointer",
2405
+ id: "mouse",
2406
+ parameters: { pointerType: "mouse" },
2407
+ actions: [
2408
+ { type: "pointerMove", duration: 0, x: from.x, y: from.y },
2409
+ { type: "pointerDown", button: 0 },
2410
+ { type: "pointerMove", duration: 500, x: to.x, y: to.y },
2411
+ { type: "pointerUp", button: 0 }
2412
+ ]
2413
+ }
2414
+ ]);
2415
+ }
2329
2416
  async mouseWheel(deltaX, deltaY, duration = 1e3) {
2330
2417
  const n = 4;
2331
2418
  const windowSize = await this.browser.getWindowSize();
@@ -1012,7 +1012,10 @@ var PageTaskExecutor = class {
1012
1012
  locate: plan2.locate,
1013
1013
  executor: async (param, taskContext) => {
1014
1014
  const { task } = taskContext;
1015
- (0, import_node_assert2.default)((param == null ? void 0 : param.prompt) || (param == null ? void 0 : param.id), "No prompt or id to locate");
1015
+ (0, import_node_assert2.default)(
1016
+ (param == null ? void 0 : param.prompt) || (param == null ? void 0 : param.id) || (param == null ? void 0 : param.position),
1017
+ "No prompt or id or position to locate"
1018
+ );
1016
1019
  let insightDump;
1017
1020
  const dumpCollector = (dump) => {
1018
1021
  insightDump = dump;
@@ -1167,6 +1170,22 @@ var PageTaskExecutor = class {
1167
1170
  }
1168
1171
  };
1169
1172
  tasks.push(taskActionTap);
1173
+ } else if (plan2.type === "Drag") {
1174
+ const taskActionDrag = {
1175
+ type: "Action",
1176
+ subType: "Drag",
1177
+ param: plan2.param,
1178
+ thought: plan2.thought,
1179
+ locate: plan2.locate,
1180
+ executor: async (taskParam) => {
1181
+ (0, import_node_assert2.default)(
1182
+ (taskParam == null ? void 0 : taskParam.start_box) && (taskParam == null ? void 0 : taskParam.end_box),
1183
+ "No start_box or end_box to drag"
1184
+ );
1185
+ await this.page.mouse.drag(taskParam.start_box, taskParam.end_box);
1186
+ }
1187
+ };
1188
+ tasks.push(taskActionDrag);
1170
1189
  } else if (plan2.type === "Hover") {
1171
1190
  const taskActionHover = {
1172
1191
  type: "Action",
@@ -1201,9 +1220,9 @@ var PageTaskExecutor = class {
1201
1220
  } else if (scrollToEventName === "untilLeft") {
1202
1221
  await this.page.scrollUntilLeft(startingPoint);
1203
1222
  } else if (scrollToEventName === "once" || !scrollToEventName) {
1204
- if (taskParam.direction === "down" || !taskParam.direction) {
1223
+ if ((taskParam == null ? void 0 : taskParam.direction) === "down" || !taskParam || !taskParam.direction) {
1205
1224
  await this.page.scrollDown(
1206
- taskParam.distance || void 0,
1225
+ (taskParam == null ? void 0 : taskParam.distance) || void 0,
1207
1226
  startingPoint
1208
1227
  );
1209
1228
  } else if (taskParam.direction === "up") {
@@ -1398,6 +1417,7 @@ var PageTaskExecutor = class {
1398
1417
  userPrompt
1399
1418
  },
1400
1419
  executor: async (param, executorContext) => {
1420
+ var _a;
1401
1421
  const shotTime = Date.now();
1402
1422
  const pageContext = await this.insight.contextRetrieverFn("locate");
1403
1423
  const recordItem = {
@@ -1434,7 +1454,7 @@ var PageTaskExecutor = class {
1434
1454
  return {
1435
1455
  output: {
1436
1456
  actions,
1437
- thought: actions[0].thought,
1457
+ thought: (_a = actions[0]) == null ? void 0 : _a.thought,
1438
1458
  actionType: actions[0].type,
1439
1459
  taskWillBeAccomplished: false,
1440
1460
  furtherPlan: {
@@ -1946,7 +1966,8 @@ var StaticPage = class {
1946
1966
  this.mouse = {
1947
1967
  click: ThrowNotImplemented.bind(null, "mouse.click"),
1948
1968
  wheel: ThrowNotImplemented.bind(null, "mouse.wheel"),
1949
- move: ThrowNotImplemented.bind(null, "mouse.move")
1969
+ move: ThrowNotImplemented.bind(null, "mouse.move"),
1970
+ drag: ThrowNotImplemented.bind(null, "mouse.drag")
1950
1971
  };
1951
1972
  this.keyboard = {
1952
1973
  type: ThrowNotImplemented.bind(null, "keyboard.type"),
@@ -675,7 +675,10 @@ var PageTaskExecutor = class {
675
675
  locate: plan2.locate,
676
676
  executor: async (param, taskContext) => {
677
677
  const { task } = taskContext;
678
- (0, import_node_assert2.default)((param == null ? void 0 : param.prompt) || (param == null ? void 0 : param.id), "No prompt or id to locate");
678
+ (0, import_node_assert2.default)(
679
+ (param == null ? void 0 : param.prompt) || (param == null ? void 0 : param.id) || (param == null ? void 0 : param.position),
680
+ "No prompt or id or position to locate"
681
+ );
679
682
  let insightDump;
680
683
  const dumpCollector = (dump) => {
681
684
  insightDump = dump;
@@ -830,6 +833,22 @@ var PageTaskExecutor = class {
830
833
  }
831
834
  };
832
835
  tasks.push(taskActionTap);
836
+ } else if (plan2.type === "Drag") {
837
+ const taskActionDrag = {
838
+ type: "Action",
839
+ subType: "Drag",
840
+ param: plan2.param,
841
+ thought: plan2.thought,
842
+ locate: plan2.locate,
843
+ executor: async (taskParam) => {
844
+ (0, import_node_assert2.default)(
845
+ (taskParam == null ? void 0 : taskParam.start_box) && (taskParam == null ? void 0 : taskParam.end_box),
846
+ "No start_box or end_box to drag"
847
+ );
848
+ await this.page.mouse.drag(taskParam.start_box, taskParam.end_box);
849
+ }
850
+ };
851
+ tasks.push(taskActionDrag);
833
852
  } else if (plan2.type === "Hover") {
834
853
  const taskActionHover = {
835
854
  type: "Action",
@@ -864,9 +883,9 @@ var PageTaskExecutor = class {
864
883
  } else if (scrollToEventName === "untilLeft") {
865
884
  await this.page.scrollUntilLeft(startingPoint);
866
885
  } else if (scrollToEventName === "once" || !scrollToEventName) {
867
- if (taskParam.direction === "down" || !taskParam.direction) {
886
+ if ((taskParam == null ? void 0 : taskParam.direction) === "down" || !taskParam || !taskParam.direction) {
868
887
  await this.page.scrollDown(
869
- taskParam.distance || void 0,
888
+ (taskParam == null ? void 0 : taskParam.distance) || void 0,
870
889
  startingPoint
871
890
  );
872
891
  } else if (taskParam.direction === "up") {
@@ -1061,6 +1080,7 @@ var PageTaskExecutor = class {
1061
1080
  userPrompt
1062
1081
  },
1063
1082
  executor: async (param, executorContext) => {
1083
+ var _a;
1064
1084
  const shotTime = Date.now();
1065
1085
  const pageContext = await this.insight.contextRetrieverFn("locate");
1066
1086
  const recordItem = {
@@ -1097,7 +1117,7 @@ var PageTaskExecutor = class {
1097
1117
  return {
1098
1118
  output: {
1099
1119
  actions,
1100
- thought: actions[0].thought,
1120
+ thought: (_a = actions[0]) == null ? void 0 : _a.thought,
1101
1121
  actionType: actions[0].type,
1102
1122
  taskWillBeAccomplished: false,
1103
1123
  furtherPlan: {
@@ -1609,7 +1629,8 @@ var StaticPage = class {
1609
1629
  this.mouse = {
1610
1630
  click: ThrowNotImplemented.bind(null, "mouse.click"),
1611
1631
  wheel: ThrowNotImplemented.bind(null, "mouse.wheel"),
1612
- move: ThrowNotImplemented.bind(null, "mouse.move")
1632
+ move: ThrowNotImplemented.bind(null, "mouse.move"),
1633
+ drag: ThrowNotImplemented.bind(null, "mouse.drag")
1613
1634
  };
1614
1635
  this.keyboard = {
1615
1636
  type: ThrowNotImplemented.bind(null, "keyboard.type"),
@@ -558,7 +558,10 @@ var PageTaskExecutor = class {
558
558
  locate: plan2.locate,
559
559
  executor: async (param, taskContext) => {
560
560
  const { task } = taskContext;
561
- (0, import_node_assert.default)((param == null ? void 0 : param.prompt) || (param == null ? void 0 : param.id), "No prompt or id to locate");
561
+ (0, import_node_assert.default)(
562
+ (param == null ? void 0 : param.prompt) || (param == null ? void 0 : param.id) || (param == null ? void 0 : param.position),
563
+ "No prompt or id or position to locate"
564
+ );
562
565
  let insightDump;
563
566
  const dumpCollector = (dump) => {
564
567
  insightDump = dump;
@@ -713,6 +716,22 @@ var PageTaskExecutor = class {
713
716
  }
714
717
  };
715
718
  tasks.push(taskActionTap);
719
+ } else if (plan2.type === "Drag") {
720
+ const taskActionDrag = {
721
+ type: "Action",
722
+ subType: "Drag",
723
+ param: plan2.param,
724
+ thought: plan2.thought,
725
+ locate: plan2.locate,
726
+ executor: async (taskParam) => {
727
+ (0, import_node_assert.default)(
728
+ (taskParam == null ? void 0 : taskParam.start_box) && (taskParam == null ? void 0 : taskParam.end_box),
729
+ "No start_box or end_box to drag"
730
+ );
731
+ await this.page.mouse.drag(taskParam.start_box, taskParam.end_box);
732
+ }
733
+ };
734
+ tasks.push(taskActionDrag);
716
735
  } else if (plan2.type === "Hover") {
717
736
  const taskActionHover = {
718
737
  type: "Action",
@@ -747,9 +766,9 @@ var PageTaskExecutor = class {
747
766
  } else if (scrollToEventName === "untilLeft") {
748
767
  await this.page.scrollUntilLeft(startingPoint);
749
768
  } else if (scrollToEventName === "once" || !scrollToEventName) {
750
- if (taskParam.direction === "down" || !taskParam.direction) {
769
+ if ((taskParam == null ? void 0 : taskParam.direction) === "down" || !taskParam || !taskParam.direction) {
751
770
  await this.page.scrollDown(
752
- taskParam.distance || void 0,
771
+ (taskParam == null ? void 0 : taskParam.distance) || void 0,
753
772
  startingPoint
754
773
  );
755
774
  } else if (taskParam.direction === "up") {
@@ -944,6 +963,7 @@ var PageTaskExecutor = class {
944
963
  userPrompt
945
964
  },
946
965
  executor: async (param, executorContext) => {
966
+ var _a;
947
967
  const shotTime = Date.now();
948
968
  const pageContext = await this.insight.contextRetrieverFn("locate");
949
969
  const recordItem = {
@@ -980,7 +1000,7 @@ var PageTaskExecutor = class {
980
1000
  return {
981
1001
  output: {
982
1002
  actions,
983
- thought: actions[0].thought,
1003
+ thought: (_a = actions[0]) == null ? void 0 : _a.thought,
984
1004
  actionType: actions[0].type,
985
1005
  taskWillBeAccomplished: false,
986
1006
  furtherPlan: {
@@ -1813,15 +1833,49 @@ var Page = class {
1813
1833
  );
1814
1834
  }
1815
1835
  },
1816
- move: async (x, y) => this.underlyingPage.mouse.move(x, y)
1836
+ move: async (x, y) => this.underlyingPage.mouse.move(x, y),
1837
+ drag: async (from, to) => {
1838
+ if (this.pageType === "puppeteer") {
1839
+ await this.underlyingPage.mouse.drag(
1840
+ {
1841
+ x: from.x,
1842
+ y: from.y
1843
+ },
1844
+ {
1845
+ x: to.x,
1846
+ y: to.y
1847
+ }
1848
+ );
1849
+ } else if (this.pageType === "playwright") {
1850
+ await this.underlyingPage.mouse.move(
1851
+ from.x,
1852
+ from.y
1853
+ );
1854
+ await this.underlyingPage.mouse.down();
1855
+ await this.underlyingPage.mouse.move(to.x, to.y);
1856
+ await this.underlyingPage.mouse.up();
1857
+ }
1858
+ }
1817
1859
  };
1818
1860
  }
1819
1861
  get keyboard() {
1820
1862
  return {
1821
1863
  type: async (text) => this.underlyingPage.keyboard.type(text, { delay: 80 }),
1822
- press: async (key) => this.underlyingPage.keyboard.press(key),
1823
- down: async (key) => this.underlyingPage.keyboard.down(key),
1824
- up: async (key) => this.underlyingPage.keyboard.up(key)
1864
+ press: async (key) => {
1865
+ const keys = Array.isArray(key) ? key : [key];
1866
+ for (const key2 of keys) {
1867
+ await this.underlyingPage.keyboard.down(key2);
1868
+ }
1869
+ for (const key2 of [...keys].reverse()) {
1870
+ await this.underlyingPage.keyboard.up(key2);
1871
+ }
1872
+ },
1873
+ down: async (key) => {
1874
+ this.underlyingPage.keyboard.down(key);
1875
+ },
1876
+ up: async (key) => {
1877
+ this.underlyingPage.keyboard.up(key);
1878
+ }
1825
1879
  };
1826
1880
  }
1827
1881
  async clearInput(element) {
@@ -555,7 +555,10 @@ var PageTaskExecutor = class {
555
555
  locate: plan2.locate,
556
556
  executor: async (param, taskContext) => {
557
557
  const { task } = taskContext;
558
- (0, import_node_assert.default)((param == null ? void 0 : param.prompt) || (param == null ? void 0 : param.id), "No prompt or id to locate");
558
+ (0, import_node_assert.default)(
559
+ (param == null ? void 0 : param.prompt) || (param == null ? void 0 : param.id) || (param == null ? void 0 : param.position),
560
+ "No prompt or id or position to locate"
561
+ );
559
562
  let insightDump;
560
563
  const dumpCollector = (dump) => {
561
564
  insightDump = dump;
@@ -710,6 +713,22 @@ var PageTaskExecutor = class {
710
713
  }
711
714
  };
712
715
  tasks.push(taskActionTap);
716
+ } else if (plan2.type === "Drag") {
717
+ const taskActionDrag = {
718
+ type: "Action",
719
+ subType: "Drag",
720
+ param: plan2.param,
721
+ thought: plan2.thought,
722
+ locate: plan2.locate,
723
+ executor: async (taskParam) => {
724
+ (0, import_node_assert.default)(
725
+ (taskParam == null ? void 0 : taskParam.start_box) && (taskParam == null ? void 0 : taskParam.end_box),
726
+ "No start_box or end_box to drag"
727
+ );
728
+ await this.page.mouse.drag(taskParam.start_box, taskParam.end_box);
729
+ }
730
+ };
731
+ tasks.push(taskActionDrag);
713
732
  } else if (plan2.type === "Hover") {
714
733
  const taskActionHover = {
715
734
  type: "Action",
@@ -744,9 +763,9 @@ var PageTaskExecutor = class {
744
763
  } else if (scrollToEventName === "untilLeft") {
745
764
  await this.page.scrollUntilLeft(startingPoint);
746
765
  } else if (scrollToEventName === "once" || !scrollToEventName) {
747
- if (taskParam.direction === "down" || !taskParam.direction) {
766
+ if ((taskParam == null ? void 0 : taskParam.direction) === "down" || !taskParam || !taskParam.direction) {
748
767
  await this.page.scrollDown(
749
- taskParam.distance || void 0,
768
+ (taskParam == null ? void 0 : taskParam.distance) || void 0,
750
769
  startingPoint
751
770
  );
752
771
  } else if (taskParam.direction === "up") {
@@ -941,6 +960,7 @@ var PageTaskExecutor = class {
941
960
  userPrompt
942
961
  },
943
962
  executor: async (param, executorContext) => {
963
+ var _a;
944
964
  const shotTime = Date.now();
945
965
  const pageContext = await this.insight.contextRetrieverFn("locate");
946
966
  const recordItem = {
@@ -977,7 +997,7 @@ var PageTaskExecutor = class {
977
997
  return {
978
998
  output: {
979
999
  actions,
980
- thought: actions[0].thought,
1000
+ thought: (_a = actions[0]) == null ? void 0 : _a.thought,
981
1001
  actionType: actions[0].type,
982
1002
  taskWillBeAccomplished: false,
983
1003
  furtherPlan: {
@@ -1658,15 +1678,49 @@ var Page = class {
1658
1678
  );
1659
1679
  }
1660
1680
  },
1661
- move: async (x, y) => this.underlyingPage.mouse.move(x, y)
1681
+ move: async (x, y) => this.underlyingPage.mouse.move(x, y),
1682
+ drag: async (from, to) => {
1683
+ if (this.pageType === "puppeteer") {
1684
+ await this.underlyingPage.mouse.drag(
1685
+ {
1686
+ x: from.x,
1687
+ y: from.y
1688
+ },
1689
+ {
1690
+ x: to.x,
1691
+ y: to.y
1692
+ }
1693
+ );
1694
+ } else if (this.pageType === "playwright") {
1695
+ await this.underlyingPage.mouse.move(
1696
+ from.x,
1697
+ from.y
1698
+ );
1699
+ await this.underlyingPage.mouse.down();
1700
+ await this.underlyingPage.mouse.move(to.x, to.y);
1701
+ await this.underlyingPage.mouse.up();
1702
+ }
1703
+ }
1662
1704
  };
1663
1705
  }
1664
1706
  get keyboard() {
1665
1707
  return {
1666
1708
  type: async (text) => this.underlyingPage.keyboard.type(text, { delay: 80 }),
1667
- press: async (key) => this.underlyingPage.keyboard.press(key),
1668
- down: async (key) => this.underlyingPage.keyboard.down(key),
1669
- up: async (key) => this.underlyingPage.keyboard.up(key)
1709
+ press: async (key) => {
1710
+ const keys = Array.isArray(key) ? key : [key];
1711
+ for (const key2 of keys) {
1712
+ await this.underlyingPage.keyboard.down(key2);
1713
+ }
1714
+ for (const key2 of [...keys].reverse()) {
1715
+ await this.underlyingPage.keyboard.up(key2);
1716
+ }
1717
+ },
1718
+ down: async (key) => {
1719
+ this.underlyingPage.keyboard.down(key);
1720
+ },
1721
+ up: async (key) => {
1722
+ this.underlyingPage.keyboard.up(key);
1723
+ }
1670
1724
  };
1671
1725
  }
1672
1726
  async clearInput(element) {
@@ -1772,7 +1826,7 @@ var defaultViewportWidth = 1440;
1772
1826
  var defaultViewportHeight = 900;
1773
1827
  var defaultViewportScale = process.platform === "darwin" ? 2 : 1;
1774
1828
  var defaultWaitForNetworkIdleTimeout = 10 * 1e3;
1775
- async function puppeteerAgentForTarget(target, preference) {
1829
+ async function launchPuppeteerPage(target, preference) {
1776
1830
  var _a, _b, _c;
1777
1831
  (0, import_node_assert3.default)(target.url, "url is required");
1778
1832
  const freeFn = [];
@@ -1827,10 +1881,12 @@ async function puppeteerAgentForTarget(target, preference) {
1827
1881
  const isWindows = process.platform === "win32";
1828
1882
  const browser = await puppeteer.launch({
1829
1883
  headless: !headed,
1884
+ defaultViewport: viewportConfig,
1830
1885
  args: [
1831
1886
  ...isWindows ? [] : ["--no-sandbox", "--disable-setuid-sandbox"],
1832
1887
  "--disable-features=PasswordLeakDetection",
1833
1888
  "--disable-save-password-bubble",
1889
+ `--user-agent="${ua}"`,
1834
1890
  preferMaximizedWindow ? "--start-maximized" : `--window-size=${width},${height}`
1835
1891
  ]
1836
1892
  });
@@ -1850,15 +1906,13 @@ async function puppeteerAgentForTarget(target, preference) {
1850
1906
  });
1851
1907
  const pages = await browser.pages();
1852
1908
  const page = pages[0];
1853
- await page.setUserAgent(ua);
1854
- await page.setViewport(viewportConfig);
1855
1909
  if (target.cookie) {
1856
1910
  const cookieFileContent = (0, import_node_fs3.readFileSync)(target.cookie, "utf-8");
1857
1911
  await page.setCookie(...JSON.parse(cookieFileContent));
1858
1912
  }
1859
- await page.goto(target.url);
1860
1913
  const waitForNetworkIdleTimeout = typeof ((_a = target.waitForNetworkIdle) == null ? void 0 : _a.timeout) === "number" ? target.waitForNetworkIdle.timeout : defaultWaitForNetworkIdleTimeout;
1861
1914
  try {
1915
+ await page.goto(target.url);
1862
1916
  if (waitForNetworkIdleTimeout > 0) {
1863
1917
  await page.waitForNetworkIdle({
1864
1918
  timeout: waitForNetworkIdleTimeout
@@ -1874,9 +1928,15 @@ async function puppeteerAgentForTarget(target, preference) {
1874
1928
  const newMessage = `failed to wait for network idle after ${waitForNetworkIdleTimeout}ms, but the script will continue.`;
1875
1929
  console.warn(newMessage);
1876
1930
  }
1931
+ return { page, freeFn };
1932
+ }
1933
+ async function puppeteerAgentForTarget(target, preference) {
1934
+ const { page, freeFn } = await launchPuppeteerPage(target, preference);
1877
1935
  const agent = new PuppeteerAgent(page, {
1878
1936
  autoPrintReportMsg: false,
1879
- testId: preference == null ? void 0 : preference.testId
1937
+ testId: preference == null ? void 0 : preference.testId,
1938
+ trackingActiveTab: typeof target.trackingActiveTab !== "undefined" ? target.trackingActiveTab : true
1939
+ // true for default in yaml script
1880
1940
  });
1881
1941
  freeFn.push({
1882
1942
  name: "midscene_puppeteer_agent",
@@ -1890,6 +1950,23 @@ var PuppeteerAgent = class extends PageAgent {
1890
1950
  constructor(page, opts) {
1891
1951
  const webPage = new WebPage(page);
1892
1952
  super(webPage, opts);
1953
+ if (opts == null ? void 0 : opts.trackingActiveTab) {
1954
+ const browser = this.page.underlyingPage.browser();
1955
+ browser.on("targetcreated", async (target) => {
1956
+ if (target.type() === "page") {
1957
+ const targetPage = await target.page();
1958
+ if (!targetPage) {
1959
+ console.warn(
1960
+ "got a targetPage event, but the page is not ready yet, skip"
1961
+ );
1962
+ return;
1963
+ }
1964
+ const midscenePage = new WebPage(targetPage);
1965
+ this.page = midscenePage;
1966
+ this.taskExecutor.page = midscenePage;
1967
+ }
1968
+ });
1969
+ }
1893
1970
  }
1894
1971
  };
1895
1972
  // Annotate the CommonJS export names for ESM import in node:
@@ -1,6 +1,6 @@
1
- export { P as AppiumAgent } from './tasks-f2973dd7.js';
2
- export { P as AppiumPage } from './page-fc3be0ec.js';
3
- import '@midscene/core/dist/lib/types/types-64c4d87b';
1
+ export { P as AppiumAgent } from './tasks-681afd8a.js';
2
+ export { P as AppiumPage } from './page-cfd75cdf.js';
3
+ import '@midscene/core/dist/lib/types/types-7fe32cfe';
4
4
  import '@midscene/core';
5
5
  import '@midscene/core/ai-model';
6
6
  import '@midscene/shared/fs';
@@ -1,8 +1,8 @@
1
- export { C as ChromeExtensionPageBrowserSide } from './browser-fd2e9b68.js';
2
- import './page-fc3be0ec.js';
1
+ export { C as ChromeExtensionPageBrowserSide } from './browser-bc6c204c.js';
2
+ import './page-cfd75cdf.js';
3
3
  import 'playwright';
4
4
  import '@midscene/core';
5
5
  import 'puppeteer';
6
6
  import '@midscene/shared/constants';
7
- import '@midscene/core/dist/lib/types/types-64c4d87b';
7
+ import '@midscene/core/dist/lib/types/types-7fe32cfe';
8
8
  import 'webdriverio';
@@ -1,7 +1,8 @@
1
- import { P as PageAgent } from './tasks-f2973dd7.js';
2
- import { B as BridgeConnectTabOptions, C as ChromeExtensionPageBrowserSide } from './browser-fd2e9b68.js';
3
- import '@midscene/core/dist/lib/types/types-64c4d87b';
4
- import './page-fc3be0ec.js';
1
+ import { P as PageAgent, a as PageAgentOpt } from './tasks-681afd8a.js';
2
+ import { B as BridgeConnectTabOptions, C as ChromeExtensionPageBrowserSide } from './browser-bc6c204c.js';
3
+ export { overrideAIConfig } from '@midscene/core/env';
4
+ import '@midscene/core/dist/lib/types/types-7fe32cfe';
5
+ import './page-cfd75cdf.js';
5
6
  import 'playwright';
6
7
  import '@midscene/core';
7
8
  import 'puppeteer';
@@ -14,7 +15,7 @@ interface ChromeExtensionPageCliSide extends ChromeExtensionPageBrowserSide {
14
15
  showStatusMessage: (message: string) => Promise<void>;
15
16
  }
16
17
  declare class AgentOverChromeBridge extends PageAgent<ChromeExtensionPageCliSide> {
17
- constructor();
18
+ constructor(opts?: PageAgentOpt);
18
19
  connectNewTabWithUrl(url: string, options?: BridgeConnectTabOptions): Promise<void>;
19
20
  connectCurrentTab(options?: BridgeConnectTabOptions): Promise<void>;
20
21
  aiAction(prompt: string, options?: any): Promise<void>;