@midscene/web 0.8.6 → 0.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -831,26 +831,42 @@ var PageTaskExecutor = class {
831
831
  param: plan2.param,
832
832
  thought: plan2.thought,
833
833
  locate: plan2.locate,
834
- executor: async (taskParam) => {
835
- const scrollToEventName = taskParam.scrollType;
836
- switch (scrollToEventName) {
837
- case "scrollUntilTop":
838
- await this.page.scrollUntilTop();
839
- break;
840
- case "scrollUntilBottom":
841
- await this.page.scrollUntilBottom();
842
- break;
843
- case "scrollUpOneScreen":
844
- await this.page.scrollUpOneScreen();
845
- break;
846
- case "scrollDownOneScreen":
847
- await this.page.scrollDownOneScreen();
848
- break;
849
- default:
850
- console.error(
851
- "Unknown scroll event type:",
852
- scrollToEventName
834
+ executor: async (taskParam, { element }) => {
835
+ if (element) {
836
+ await this.page.mouse.move(
837
+ element.center[0],
838
+ element.center[1]
839
+ );
840
+ }
841
+ const scrollToEventName = taskParam == null ? void 0 : taskParam.scrollType;
842
+ if (scrollToEventName === "untilTop") {
843
+ await this.page.scrollUntilTop();
844
+ } else if (scrollToEventName === "untilBottom") {
845
+ await this.page.scrollUntilBottom();
846
+ } else if (scrollToEventName === "untilRight") {
847
+ await this.page.scrollUntilRight();
848
+ } else if (scrollToEventName === "untilLeft") {
849
+ await this.page.scrollUntilLeft();
850
+ } else if (scrollToEventName === "once") {
851
+ if (taskParam.direction === "down") {
852
+ await this.page.scrollDown(taskParam.distance || void 0);
853
+ } else if (taskParam.direction === "up") {
854
+ await this.page.scrollUp(taskParam.distance || void 0);
855
+ } else if (taskParam.direction === "left") {
856
+ await this.page.scrollLeft(taskParam.distance || void 0);
857
+ } else if (taskParam.direction === "right") {
858
+ await this.page.scrollRight(taskParam.distance || void 0);
859
+ } else {
860
+ throw new Error(
861
+ `Unknown scroll direction: ${taskParam.direction}`
853
862
  );
863
+ }
864
+ } else {
865
+ throw new Error(
866
+ `Unknown scroll event type: ${scrollToEventName}, taskParam: ${JSON.stringify(
867
+ taskParam
868
+ )}`
869
+ );
854
870
  }
855
871
  }
856
872
  };
@@ -879,6 +895,17 @@ var PageTaskExecutor = class {
879
895
  }
880
896
  };
881
897
  tasks.push(taskActionError);
898
+ } else if (plan2.type === "FalsyConditionStatement") {
899
+ const taskActionFalsyConditionStatement = {
900
+ type: "Action",
901
+ subType: "FalsyConditionStatement",
902
+ param: null,
903
+ thought: plan2.thought,
904
+ locate: plan2.locate,
905
+ executor: async () => {
906
+ }
907
+ };
908
+ tasks.push(taskActionFalsyConditionStatement);
882
909
  } else {
883
910
  throw new Error(`Unknown or unsupported task type: ${plan2.type}`);
884
911
  }
@@ -998,6 +1025,9 @@ var PageTaskExecutor = class {
998
1025
  const errorMsg = "Replanning too many times, please split the task into multiple steps";
999
1026
  return this.appendErrorPlan(taskExecutor, errorMsg);
1000
1027
  }
1028
+ if (replanCount > 0) {
1029
+ await (0, import_utils5.sleep)(300);
1030
+ }
1001
1031
  await taskExecutor.append(planningTask);
1002
1032
  const planResult = await taskExecutor.flush();
1003
1033
  if (taskExecutor.isInErrorState()) {
@@ -1367,11 +1397,23 @@ var StaticPage = class {
1367
1397
  async scrollUntilBottom() {
1368
1398
  return ThrowNotImplemented("scrollUntilBottom");
1369
1399
  }
1370
- async scrollUpOneScreen() {
1371
- return ThrowNotImplemented("scrollUpOneScreen");
1400
+ async scrollUntilLeft() {
1401
+ return ThrowNotImplemented("scrollUntilLeft");
1402
+ }
1403
+ async scrollUntilRight() {
1404
+ return ThrowNotImplemented("scrollUntilRight");
1405
+ }
1406
+ async scrollUp(distance) {
1407
+ return ThrowNotImplemented("scrollUp");
1408
+ }
1409
+ async scrollDown(distance) {
1410
+ return ThrowNotImplemented("scrollDown");
1411
+ }
1412
+ async scrollLeft(distance) {
1413
+ return ThrowNotImplemented("scrollLeft");
1372
1414
  }
1373
- async scrollDownOneScreen() {
1374
- return ThrowNotImplemented("scrollDownOneScreen");
1415
+ async scrollRight(distance) {
1416
+ return ThrowNotImplemented("scrollRight");
1375
1417
  }
1376
1418
  async clearInput() {
1377
1419
  return ThrowNotImplemented("clearInput");
@@ -721,26 +721,42 @@ var PageTaskExecutor = class {
721
721
  param: plan2.param,
722
722
  thought: plan2.thought,
723
723
  locate: plan2.locate,
724
- executor: async (taskParam) => {
725
- const scrollToEventName = taskParam.scrollType;
726
- switch (scrollToEventName) {
727
- case "scrollUntilTop":
728
- await this.page.scrollUntilTop();
729
- break;
730
- case "scrollUntilBottom":
731
- await this.page.scrollUntilBottom();
732
- break;
733
- case "scrollUpOneScreen":
734
- await this.page.scrollUpOneScreen();
735
- break;
736
- case "scrollDownOneScreen":
737
- await this.page.scrollDownOneScreen();
738
- break;
739
- default:
740
- console.error(
741
- "Unknown scroll event type:",
742
- scrollToEventName
724
+ executor: async (taskParam, { element }) => {
725
+ if (element) {
726
+ await this.page.mouse.move(
727
+ element.center[0],
728
+ element.center[1]
729
+ );
730
+ }
731
+ const scrollToEventName = taskParam == null ? void 0 : taskParam.scrollType;
732
+ if (scrollToEventName === "untilTop") {
733
+ await this.page.scrollUntilTop();
734
+ } else if (scrollToEventName === "untilBottom") {
735
+ await this.page.scrollUntilBottom();
736
+ } else if (scrollToEventName === "untilRight") {
737
+ await this.page.scrollUntilRight();
738
+ } else if (scrollToEventName === "untilLeft") {
739
+ await this.page.scrollUntilLeft();
740
+ } else if (scrollToEventName === "once") {
741
+ if (taskParam.direction === "down") {
742
+ await this.page.scrollDown(taskParam.distance || void 0);
743
+ } else if (taskParam.direction === "up") {
744
+ await this.page.scrollUp(taskParam.distance || void 0);
745
+ } else if (taskParam.direction === "left") {
746
+ await this.page.scrollLeft(taskParam.distance || void 0);
747
+ } else if (taskParam.direction === "right") {
748
+ await this.page.scrollRight(taskParam.distance || void 0);
749
+ } else {
750
+ throw new Error(
751
+ `Unknown scroll direction: ${taskParam.direction}`
743
752
  );
753
+ }
754
+ } else {
755
+ throw new Error(
756
+ `Unknown scroll event type: ${scrollToEventName}, taskParam: ${JSON.stringify(
757
+ taskParam
758
+ )}`
759
+ );
744
760
  }
745
761
  }
746
762
  };
@@ -769,6 +785,17 @@ var PageTaskExecutor = class {
769
785
  }
770
786
  };
771
787
  tasks.push(taskActionError);
788
+ } else if (plan2.type === "FalsyConditionStatement") {
789
+ const taskActionFalsyConditionStatement = {
790
+ type: "Action",
791
+ subType: "FalsyConditionStatement",
792
+ param: null,
793
+ thought: plan2.thought,
794
+ locate: plan2.locate,
795
+ executor: async () => {
796
+ }
797
+ };
798
+ tasks.push(taskActionFalsyConditionStatement);
772
799
  } else {
773
800
  throw new Error(`Unknown or unsupported task type: ${plan2.type}`);
774
801
  }
@@ -888,6 +915,9 @@ var PageTaskExecutor = class {
888
915
  const errorMsg = "Replanning too many times, please split the task into multiple steps";
889
916
  return this.appendErrorPlan(taskExecutor, errorMsg);
890
917
  }
918
+ if (replanCount > 0) {
919
+ await (0, import_utils3.sleep)(300);
920
+ }
891
921
  await taskExecutor.append(planningTask);
892
922
  const planResult = await taskExecutor.flush();
893
923
  if (taskExecutor.isInErrorState()) {
@@ -1573,15 +1603,31 @@ var Page = class {
1573
1603
  scrollUntilBottom() {
1574
1604
  return this.mouse.wheel(0, 9999999);
1575
1605
  }
1576
- async scrollUpOneScreen() {
1606
+ scrollUntilLeft() {
1607
+ return this.mouse.wheel(-9999999, 0);
1608
+ }
1609
+ scrollUntilRight() {
1610
+ return this.mouse.wheel(9999999, 0);
1611
+ }
1612
+ async scrollUp(distance) {
1577
1613
  const innerHeight = await this.evaluate(() => window.innerHeight);
1578
- const distance = innerHeight * 0.7;
1579
- await this.mouse.wheel(0, -distance);
1614
+ const scrollDistance = distance || innerHeight * 0.7;
1615
+ await this.mouse.wheel(0, -scrollDistance);
1580
1616
  }
1581
- async scrollDownOneScreen() {
1617
+ async scrollDown(distance) {
1582
1618
  const innerHeight = await this.evaluate(() => window.innerHeight);
1583
- const distance = innerHeight * 0.7;
1584
- await this.mouse.wheel(0, distance);
1619
+ const scrollDistance = distance || innerHeight * 0.7;
1620
+ await this.mouse.wheel(0, scrollDistance);
1621
+ }
1622
+ async scrollLeft(distance) {
1623
+ const innerWidth = await this.evaluate(() => window.innerWidth);
1624
+ const scrollDistance = distance || innerWidth * 0.7;
1625
+ await this.mouse.wheel(-scrollDistance, 0);
1626
+ }
1627
+ async scrollRight(distance) {
1628
+ const innerWidth = await this.evaluate(() => window.innerWidth);
1629
+ const scrollDistance = distance || innerWidth * 0.7;
1630
+ await this.mouse.wheel(scrollDistance, 0);
1585
1631
  }
1586
1632
  async destroy() {
1587
1633
  }
@@ -717,26 +717,42 @@ var PageTaskExecutor = class {
717
717
  param: plan2.param,
718
718
  thought: plan2.thought,
719
719
  locate: plan2.locate,
720
- executor: async (taskParam) => {
721
- const scrollToEventName = taskParam.scrollType;
722
- switch (scrollToEventName) {
723
- case "scrollUntilTop":
724
- await this.page.scrollUntilTop();
725
- break;
726
- case "scrollUntilBottom":
727
- await this.page.scrollUntilBottom();
728
- break;
729
- case "scrollUpOneScreen":
730
- await this.page.scrollUpOneScreen();
731
- break;
732
- case "scrollDownOneScreen":
733
- await this.page.scrollDownOneScreen();
734
- break;
735
- default:
736
- console.error(
737
- "Unknown scroll event type:",
738
- scrollToEventName
720
+ executor: async (taskParam, { element }) => {
721
+ if (element) {
722
+ await this.page.mouse.move(
723
+ element.center[0],
724
+ element.center[1]
725
+ );
726
+ }
727
+ const scrollToEventName = taskParam == null ? void 0 : taskParam.scrollType;
728
+ if (scrollToEventName === "untilTop") {
729
+ await this.page.scrollUntilTop();
730
+ } else if (scrollToEventName === "untilBottom") {
731
+ await this.page.scrollUntilBottom();
732
+ } else if (scrollToEventName === "untilRight") {
733
+ await this.page.scrollUntilRight();
734
+ } else if (scrollToEventName === "untilLeft") {
735
+ await this.page.scrollUntilLeft();
736
+ } else if (scrollToEventName === "once") {
737
+ if (taskParam.direction === "down") {
738
+ await this.page.scrollDown(taskParam.distance || void 0);
739
+ } else if (taskParam.direction === "up") {
740
+ await this.page.scrollUp(taskParam.distance || void 0);
741
+ } else if (taskParam.direction === "left") {
742
+ await this.page.scrollLeft(taskParam.distance || void 0);
743
+ } else if (taskParam.direction === "right") {
744
+ await this.page.scrollRight(taskParam.distance || void 0);
745
+ } else {
746
+ throw new Error(
747
+ `Unknown scroll direction: ${taskParam.direction}`
739
748
  );
749
+ }
750
+ } else {
751
+ throw new Error(
752
+ `Unknown scroll event type: ${scrollToEventName}, taskParam: ${JSON.stringify(
753
+ taskParam
754
+ )}`
755
+ );
740
756
  }
741
757
  }
742
758
  };
@@ -765,6 +781,17 @@ var PageTaskExecutor = class {
765
781
  }
766
782
  };
767
783
  tasks.push(taskActionError);
784
+ } else if (plan2.type === "FalsyConditionStatement") {
785
+ const taskActionFalsyConditionStatement = {
786
+ type: "Action",
787
+ subType: "FalsyConditionStatement",
788
+ param: null,
789
+ thought: plan2.thought,
790
+ locate: plan2.locate,
791
+ executor: async () => {
792
+ }
793
+ };
794
+ tasks.push(taskActionFalsyConditionStatement);
768
795
  } else {
769
796
  throw new Error(`Unknown or unsupported task type: ${plan2.type}`);
770
797
  }
@@ -884,6 +911,9 @@ var PageTaskExecutor = class {
884
911
  const errorMsg = "Replanning too many times, please split the task into multiple steps";
885
912
  return this.appendErrorPlan(taskExecutor, errorMsg);
886
913
  }
914
+ if (replanCount > 0) {
915
+ await (0, import_utils3.sleep)(300);
916
+ }
887
917
  await taskExecutor.append(planningTask);
888
918
  const planResult = await taskExecutor.flush();
889
919
  if (taskExecutor.isInErrorState()) {
@@ -1417,15 +1447,31 @@ var Page = class {
1417
1447
  scrollUntilBottom() {
1418
1448
  return this.mouse.wheel(0, 9999999);
1419
1449
  }
1420
- async scrollUpOneScreen() {
1450
+ scrollUntilLeft() {
1451
+ return this.mouse.wheel(-9999999, 0);
1452
+ }
1453
+ scrollUntilRight() {
1454
+ return this.mouse.wheel(9999999, 0);
1455
+ }
1456
+ async scrollUp(distance) {
1421
1457
  const innerHeight = await this.evaluate(() => window.innerHeight);
1422
- const distance = innerHeight * 0.7;
1423
- await this.mouse.wheel(0, -distance);
1458
+ const scrollDistance = distance || innerHeight * 0.7;
1459
+ await this.mouse.wheel(0, -scrollDistance);
1424
1460
  }
1425
- async scrollDownOneScreen() {
1461
+ async scrollDown(distance) {
1426
1462
  const innerHeight = await this.evaluate(() => window.innerHeight);
1427
- const distance = innerHeight * 0.7;
1428
- await this.mouse.wheel(0, distance);
1463
+ const scrollDistance = distance || innerHeight * 0.7;
1464
+ await this.mouse.wheel(0, scrollDistance);
1465
+ }
1466
+ async scrollLeft(distance) {
1467
+ const innerWidth = await this.evaluate(() => window.innerWidth);
1468
+ const scrollDistance = distance || innerWidth * 0.7;
1469
+ await this.mouse.wheel(-scrollDistance, 0);
1470
+ }
1471
+ async scrollRight(distance) {
1472
+ const innerWidth = await this.evaluate(() => window.innerWidth);
1473
+ const scrollDistance = distance || innerWidth * 0.7;
1474
+ await this.mouse.wheel(scrollDistance, 0);
1429
1475
  }
1430
1476
  async destroy() {
1431
1477
  }
@@ -28,7 +28,7 @@ function typeStr(task) {
28
28
  return task.subType ? `${task.type} / ${task.subType || ""}` : task.type;
29
29
  }
30
30
  function paramStr(task) {
31
- var _a, _b, _c, _d, _e, _f, _g, _h;
31
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k;
32
32
  let value;
33
33
  if (task.type === "Planning") {
34
34
  value = (_a = task == null ? void 0 : task.param) == null ? void 0 : _a.userPrompt;
@@ -38,10 +38,15 @@ function paramStr(task) {
38
38
  }
39
39
  if (task.type === "Action") {
40
40
  const sleepMs = (_f = task == null ? void 0 : task.param) == null ? void 0 : _f.timeMs;
41
+ const scrollType = (_g = task == null ? void 0 : task.param) == null ? void 0 : _g.scrollType;
41
42
  if (sleepMs) {
42
43
  value = `${sleepMs}ms`;
44
+ } else if (scrollType) {
45
+ const scrollDirection = (_h = task == null ? void 0 : task.param) == null ? void 0 : _h.direction;
46
+ const scrollDistance = (_i = task == null ? void 0 : task.param) == null ? void 0 : _i.distance;
47
+ value = `${scrollDirection}, ${scrollType}, ${scrollDistance || "distance-not-set"}`;
43
48
  } else {
44
- value = ((_g = task == null ? void 0 : task.param) == null ? void 0 : _g.value) || ((_h = task == null ? void 0 : task.param) == null ? void 0 : _h.scrollType);
49
+ value = ((_j = task == null ? void 0 : task.param) == null ? void 0 : _j.value) || ((_k = task == null ? void 0 : task.param) == null ? void 0 : _k.scrollType);
45
50
  }
46
51
  if (!value) {
47
52
  value = task.thought;
@@ -1235,26 +1235,42 @@ var PageTaskExecutor = class {
1235
1235
  param: plan2.param,
1236
1236
  thought: plan2.thought,
1237
1237
  locate: plan2.locate,
1238
- executor: async (taskParam) => {
1239
- const scrollToEventName = taskParam.scrollType;
1240
- switch (scrollToEventName) {
1241
- case "scrollUntilTop":
1242
- await this.page.scrollUntilTop();
1243
- break;
1244
- case "scrollUntilBottom":
1245
- await this.page.scrollUntilBottom();
1246
- break;
1247
- case "scrollUpOneScreen":
1248
- await this.page.scrollUpOneScreen();
1249
- break;
1250
- case "scrollDownOneScreen":
1251
- await this.page.scrollDownOneScreen();
1252
- break;
1253
- default:
1254
- console.error(
1255
- "Unknown scroll event type:",
1256
- scrollToEventName
1238
+ executor: async (taskParam, { element }) => {
1239
+ if (element) {
1240
+ await this.page.mouse.move(
1241
+ element.center[0],
1242
+ element.center[1]
1243
+ );
1244
+ }
1245
+ const scrollToEventName = taskParam == null ? void 0 : taskParam.scrollType;
1246
+ if (scrollToEventName === "untilTop") {
1247
+ await this.page.scrollUntilTop();
1248
+ } else if (scrollToEventName === "untilBottom") {
1249
+ await this.page.scrollUntilBottom();
1250
+ } else if (scrollToEventName === "untilRight") {
1251
+ await this.page.scrollUntilRight();
1252
+ } else if (scrollToEventName === "untilLeft") {
1253
+ await this.page.scrollUntilLeft();
1254
+ } else if (scrollToEventName === "once") {
1255
+ if (taskParam.direction === "down") {
1256
+ await this.page.scrollDown(taskParam.distance || void 0);
1257
+ } else if (taskParam.direction === "up") {
1258
+ await this.page.scrollUp(taskParam.distance || void 0);
1259
+ } else if (taskParam.direction === "left") {
1260
+ await this.page.scrollLeft(taskParam.distance || void 0);
1261
+ } else if (taskParam.direction === "right") {
1262
+ await this.page.scrollRight(taskParam.distance || void 0);
1263
+ } else {
1264
+ throw new Error(
1265
+ `Unknown scroll direction: ${taskParam.direction}`
1257
1266
  );
1267
+ }
1268
+ } else {
1269
+ throw new Error(
1270
+ `Unknown scroll event type: ${scrollToEventName}, taskParam: ${JSON.stringify(
1271
+ taskParam
1272
+ )}`
1273
+ );
1258
1274
  }
1259
1275
  }
1260
1276
  };
@@ -1283,6 +1299,17 @@ var PageTaskExecutor = class {
1283
1299
  }
1284
1300
  };
1285
1301
  tasks.push(taskActionError);
1302
+ } else if (plan2.type === "FalsyConditionStatement") {
1303
+ const taskActionFalsyConditionStatement = {
1304
+ type: "Action",
1305
+ subType: "FalsyConditionStatement",
1306
+ param: null,
1307
+ thought: plan2.thought,
1308
+ locate: plan2.locate,
1309
+ executor: async () => {
1310
+ }
1311
+ };
1312
+ tasks.push(taskActionFalsyConditionStatement);
1286
1313
  } else {
1287
1314
  throw new Error(`Unknown or unsupported task type: ${plan2.type}`);
1288
1315
  }
@@ -1402,6 +1429,9 @@ var PageTaskExecutor = class {
1402
1429
  const errorMsg = "Replanning too many times, please split the task into multiple steps";
1403
1430
  return this.appendErrorPlan(taskExecutor, errorMsg);
1404
1431
  }
1432
+ if (replanCount > 0) {
1433
+ await (0, import_utils3.sleep)(300);
1434
+ }
1405
1435
  await taskExecutor.append(planningTask);
1406
1436
  const planResult = await taskExecutor.flush();
1407
1437
  if (taskExecutor.isInErrorState()) {
@@ -2116,24 +2146,48 @@ var Page = class {
2116
2146
  return "";
2117
2147
  }
2118
2148
  // Scroll to top element
2119
- async scrollUntilTop() {
2149
+ async scrollUntilTop(distance) {
2120
2150
  const { height } = await this.browser.getWindowSize();
2121
- await this.mouseWheel(0, height, 100);
2151
+ const scrollDistance = distance || height * 0.7;
2152
+ await this.mouseWheel(0, -scrollDistance, 100);
2122
2153
  }
2123
2154
  // Scroll to bottom element
2124
- async scrollUntilBottom() {
2155
+ async scrollUntilBottom(distance) {
2125
2156
  const { height } = await this.browser.getWindowSize();
2126
- await this.mouseWheel(0, -height, 100);
2157
+ const scrollDistance = distance || height * 0.7;
2158
+ await this.mouseWheel(0, scrollDistance, 100);
2159
+ }
2160
+ async scrollUntilLeft(distance) {
2161
+ const { width } = await this.browser.getWindowSize();
2162
+ const scrollDistance = distance || width * 0.7;
2163
+ await this.mouseWheel(-scrollDistance, 0, 100);
2164
+ }
2165
+ async scrollUntilRight(distance) {
2166
+ const { width } = await this.browser.getWindowSize();
2167
+ const scrollDistance = distance || width * 0.7;
2168
+ await this.mouseWheel(scrollDistance, 0, 100);
2127
2169
  }
2128
2170
  // Scroll up one screen
2129
- async scrollUpOneScreen() {
2171
+ async scrollUp(distance) {
2130
2172
  const { height } = await this.browser.getWindowSize();
2131
- await this.mouseWheel(0, height, 1e3);
2173
+ const scrollDistance = distance || height * 0.7;
2174
+ await this.mouseWheel(0, -scrollDistance, 1e3);
2132
2175
  }
2133
2176
  // Scroll down one screen
2134
- async scrollDownOneScreen() {
2177
+ async scrollDown(distance) {
2135
2178
  const { height } = await this.browser.getWindowSize();
2136
- await this.mouseWheel(0, -height, 1e3);
2179
+ const scrollDistance = distance || height * 0.7;
2180
+ await this.mouseWheel(0, scrollDistance, 1e3);
2181
+ }
2182
+ async scrollLeft(distance) {
2183
+ const { width } = await this.browser.getWindowSize();
2184
+ const scrollDistance = distance || width * 0.7;
2185
+ await this.mouseWheel(-scrollDistance, 0, 1e3);
2186
+ }
2187
+ async scrollRight(distance) {
2188
+ const { width } = await this.browser.getWindowSize();
2189
+ const scrollDistance = distance || width * 0.7;
2190
+ await this.mouseWheel(scrollDistance, 0, 1e3);
2137
2191
  }
2138
2192
  async keyboardType(text) {
2139
2193
  const actions = [];