@midscene/web 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/lib/index.js CHANGED
@@ -346,7 +346,8 @@ var require_dayjs_min = __commonJS({
346
346
  var src_exports = {};
347
347
  __export(src_exports, {
348
348
  PlaywrightAiFixture: () => PlaywrightAiFixture,
349
- PuppeteerAgent: () => PageAgent
349
+ PuppeteerAgent: () => PageAgent,
350
+ generateExtractData: () => generateExtractData
350
351
  });
351
352
  module.exports = __toCommonJS(src_exports);
352
353
 
@@ -486,17 +487,16 @@ async function alignElements(screenshotBuffer, elements, page) {
486
487
  return item.rect.height >= sizeThreshold && item.rect.width >= sizeThreshold;
487
488
  });
488
489
  for (const item of validElements) {
489
- const { rect } = item;
490
- const aligned = await (0, import_image.alignCoordByTrim)(screenshotBuffer, rect);
491
- item.rect = aligned;
492
- item.center = [
493
- Math.round(aligned.left + aligned.width / 2),
494
- Math.round(aligned.top + aligned.height / 2)
495
- ];
490
+ const { rect, id, content, attributes, locator } = item;
496
491
  textsAligned.push(
497
- new WebElementInfo(__spreadProps(__spreadValues({}, item), {
492
+ new WebElementInfo({
493
+ rect,
494
+ locator,
495
+ id,
496
+ content,
497
+ attributes,
498
498
  page
499
- }))
499
+ })
500
500
  );
501
501
  }
502
502
  return textsAligned;
@@ -620,7 +620,7 @@ var PageTaskExecutor = class {
620
620
  };
621
621
  return taskFind;
622
622
  }
623
- if (plan2.type === "Assert") {
623
+ if (plan2.type === "Assert" || plan2.type === "AssertWithoutThrow") {
624
624
  const assertPlan = plan2;
625
625
  const taskAssert = {
626
626
  type: "Insight",
@@ -637,13 +637,16 @@ var PageTaskExecutor = class {
637
637
  assertPlan.param.assertion
638
638
  );
639
639
  if (!assertion.pass) {
640
- task.output = assertion;
641
- task.log = {
642
- dump: insightDump
643
- };
644
- throw new Error(
645
- assertion.thought || "Assertion failed without reason"
646
- );
640
+ if (plan2.type === "Assert") {
641
+ task.output = assertion;
642
+ task.log = {
643
+ dump: insightDump
644
+ };
645
+ throw new Error(
646
+ assertion.thought || "Assertion failed without reason"
647
+ );
648
+ }
649
+ task.error = assertion.thought;
647
650
  }
648
651
  return {
649
652
  output: assertion,
@@ -759,7 +762,19 @@ var PageTaskExecutor = class {
759
762
  return taskActionSleep;
760
763
  }
761
764
  if (plan2.type === "Error") {
762
- throw new Error(`Got a task plan with type Error: ${plan2.thought}`);
765
+ const taskActionError = {
766
+ type: "Action",
767
+ subType: "Error",
768
+ param: plan2.param,
769
+ executor: async (taskParam) => {
770
+ (0, import_node_assert2.default)(
771
+ taskParam.thought,
772
+ "An error occurred, but no thought provided"
773
+ );
774
+ throw new Error(taskParam.thought);
775
+ }
776
+ };
777
+ return taskActionError;
763
778
  }
764
779
  throw new Error(`Unknown or Unsupported task type: ${plan2.type}`);
765
780
  }).map((task) => {
@@ -769,7 +784,6 @@ var PageTaskExecutor = class {
769
784
  }
770
785
  async action(userPrompt) {
771
786
  const taskExecutor = new import_core.Executor(userPrompt);
772
- taskExecutor.description = userPrompt;
773
787
  let plans = [];
774
788
  const planningTask = {
775
789
  type: "Planning",
@@ -829,7 +843,6 @@ var PageTaskExecutor = class {
829
843
  async query(demand) {
830
844
  const description = typeof demand === "string" ? demand : JSON.stringify(demand);
831
845
  const taskExecutor = new import_core.Executor(description);
832
- taskExecutor.description = description;
833
846
  const queryTask = {
834
847
  type: "Insight",
835
848
  subType: "Query",
@@ -857,9 +870,8 @@ var PageTaskExecutor = class {
857
870
  };
858
871
  }
859
872
  async assert(assertion) {
860
- const description = assertion;
873
+ const description = `assert: ${assertion}`;
861
874
  const taskExecutor = new import_core.Executor(description);
862
- taskExecutor.description = description;
863
875
  const assertionPlan = {
864
876
  type: "Assert",
865
877
  param: {
@@ -874,6 +886,64 @@ var PageTaskExecutor = class {
874
886
  executor: taskExecutor
875
887
  };
876
888
  }
889
+ async waitFor(assertion, opt) {
890
+ const description = `waitFor: ${assertion}`;
891
+ const taskExecutor = new import_core.Executor(description);
892
+ const { timeoutMs, checkIntervalMs } = opt;
893
+ (0, import_node_assert2.default)(assertion, "No assertion for waitFor");
894
+ (0, import_node_assert2.default)(timeoutMs, "No timeoutMs for waitFor");
895
+ (0, import_node_assert2.default)(checkIntervalMs, "No checkIntervalMs for waitFor");
896
+ const overallStartTime = Date.now();
897
+ let startTime = Date.now();
898
+ let errorThought = "";
899
+ while (Date.now() - overallStartTime < timeoutMs) {
900
+ startTime = Date.now();
901
+ const assertPlan = {
902
+ type: "AssertWithoutThrow",
903
+ param: {
904
+ assertion
905
+ }
906
+ };
907
+ const assertTask = await this.convertPlanToExecutable([assertPlan]);
908
+ await taskExecutor.append(this.wrapExecutorWithScreenshot(assertTask[0]));
909
+ const output = await taskExecutor.flush();
910
+ if (output.pass) {
911
+ return {
912
+ output: void 0,
913
+ executor: taskExecutor
914
+ };
915
+ }
916
+ errorThought = output.thought;
917
+ const now = Date.now();
918
+ if (now - startTime < checkIntervalMs) {
919
+ const timeRemaining = checkIntervalMs - (now - startTime);
920
+ const sleepPlan = {
921
+ type: "Sleep",
922
+ param: {
923
+ timeMs: timeRemaining
924
+ }
925
+ };
926
+ const sleepTask = await this.convertPlanToExecutable([sleepPlan]);
927
+ await taskExecutor.append(
928
+ this.wrapExecutorWithScreenshot(sleepTask[0])
929
+ );
930
+ await taskExecutor.flush();
931
+ }
932
+ }
933
+ const errorPlan = {
934
+ type: "Error",
935
+ param: {
936
+ thought: `waitFor timeout: ${errorThought}`
937
+ }
938
+ };
939
+ const errorTask = await this.convertPlanToExecutable([errorPlan]);
940
+ await taskExecutor.append(errorTask[0]);
941
+ await taskExecutor.flush();
942
+ return {
943
+ output: void 0,
944
+ executor: taskExecutor
945
+ };
946
+ }
877
947
  };
878
948
 
879
949
  // src/common/agent.ts
@@ -883,6 +953,7 @@ var PageAgent = class {
883
953
  this.opts = Object.assign(
884
954
  {
885
955
  generateReport: true,
956
+ autoPrintReportMsg: true,
886
957
  groupName: "Midscene Report",
887
958
  groupDescription: ""
888
959
  },
@@ -908,7 +979,7 @@ var PageAgent = class {
908
979
  return (0, import_utils4.stringifyDumpData)(this.dump);
909
980
  }
910
981
  writeOutActionDumps() {
911
- const generateReport = this.opts.generateReport;
982
+ const { generateReport, autoPrintReportMsg } = this.opts;
912
983
  this.reportFile = (0, import_utils4.writeLogFile)({
913
984
  fileName: this.reportFileName,
914
985
  fileExt: import_utils4.groupedActionDumpFileExt,
@@ -916,7 +987,7 @@ var PageAgent = class {
916
987
  type: "dump",
917
988
  generateReport
918
989
  });
919
- if (generateReport) {
990
+ if (generateReport && autoPrintReportMsg) {
920
991
  printReportMsg(this.reportFile);
921
992
  }
922
993
  }
@@ -952,6 +1023,20 @@ ${errorTask == null ? void 0 : errorTask.errorStack}`);
952
1023
  ${reasonMsg}`);
953
1024
  }
954
1025
  }
1026
+ async aiWaitFor(assertion, opt) {
1027
+ const { executor } = await this.taskExecutor.waitFor(assertion, {
1028
+ timeoutMs: (opt == null ? void 0 : opt.timeoutMs) || 15 * 1e3,
1029
+ checkIntervalMs: (opt == null ? void 0 : opt.checkIntervalMs) || 3 * 1e3,
1030
+ assertion
1031
+ });
1032
+ this.appendExecutionDump(executor.dump());
1033
+ this.writeOutActionDumps();
1034
+ if (executor.isInErrorState()) {
1035
+ const errorTask = executor.latestErrorTask();
1036
+ throw new Error(`${errorTask == null ? void 0 : errorTask.error}
1037
+ ${errorTask == null ? void 0 : errorTask.errorStack}`);
1038
+ }
1039
+ }
955
1040
  async ai(taskPrompt, type = "action") {
956
1041
  if (type === "action") {
957
1042
  return this.aiAction(taskPrompt);
@@ -968,6 +1053,9 @@ ${reasonMsg}`);
968
1053
  }
969
1054
  };
970
1055
 
1056
+ // src/playwright/index.ts
1057
+ var import_test = require("@playwright/test");
1058
+
971
1059
  // src/playwright/cache.ts
972
1060
  var import_node_fs2 = __toESM(require("fs"));
973
1061
  var import_node_path2 = __toESM(require("path"));
@@ -1069,14 +1157,14 @@ var PlaywrightAiFixture = () => {
1069
1157
  }
1070
1158
  return pageAgentMap[idForPage];
1071
1159
  };
1072
- const updateDumpAnnotation = (test, dump) => {
1073
- const currentAnnotation = test.annotations.find((item) => {
1160
+ const updateDumpAnnotation = (test2, dump) => {
1161
+ const currentAnnotation = test2.annotations.find((item) => {
1074
1162
  return item.type === midsceneDumpAnnotationId;
1075
1163
  });
1076
1164
  if (currentAnnotation) {
1077
1165
  currentAnnotation.description = dump;
1078
1166
  } else {
1079
- test.annotations.push({
1167
+ test2.annotations.push({
1080
1168
  type: midsceneDumpAnnotationId,
1081
1169
  description: dump
1082
1170
  });
@@ -1088,10 +1176,14 @@ var PlaywrightAiFixture = () => {
1088
1176
  const agent = agentForPage(page, testInfo);
1089
1177
  await use(
1090
1178
  async (taskPrompt, opts) => {
1091
- await page.waitForLoadState("networkidle");
1092
- const actionType = (opts == null ? void 0 : opts.type) || "action";
1093
- const result = await agent.ai(taskPrompt, actionType);
1094
- return result;
1179
+ return new Promise((resolve, reject) => {
1180
+ import_test.test.step(`ai - ${taskPrompt}`, async () => {
1181
+ await page.waitForLoadState("networkidle");
1182
+ const actionType = (opts == null ? void 0 : opts.type) || "action";
1183
+ const result = await agent.ai(taskPrompt, actionType);
1184
+ resolve(result);
1185
+ });
1186
+ });
1095
1187
  }
1096
1188
  );
1097
1189
  const taskCacheJson = agent.taskExecutor.taskCache.generateTaskCache();
@@ -1102,32 +1194,244 @@ var PlaywrightAiFixture = () => {
1102
1194
  const { taskFile, taskTitle } = groupAndCaseForTest(testInfo);
1103
1195
  const agent = agentForPage(page, testInfo);
1104
1196
  await use(async (taskPrompt) => {
1105
- await page.waitForLoadState("networkidle");
1106
- await agent.aiAction(taskPrompt);
1197
+ import_test.test.step(`aiAction - ${taskPrompt}`, async () => {
1198
+ await page.waitForLoadState("networkidle");
1199
+ await agent.aiAction(taskPrompt);
1200
+ });
1107
1201
  });
1108
1202
  updateDumpAnnotation(testInfo, agent.dumpDataString());
1109
1203
  },
1110
1204
  aiQuery: async ({ page }, use, testInfo) => {
1111
1205
  const agent = agentForPage(page, testInfo);
1112
1206
  await use(async (demand) => {
1113
- await page.waitForLoadState("networkidle");
1114
- const result = await agent.aiQuery(demand);
1115
- return result;
1207
+ return new Promise((resolve, reject) => {
1208
+ import_test.test.step(`aiQuery - ${JSON.stringify(demand)}`, async () => {
1209
+ await page.waitForLoadState("networkidle");
1210
+ const result = await agent.aiQuery(demand);
1211
+ resolve(result);
1212
+ });
1213
+ });
1116
1214
  });
1117
1215
  updateDumpAnnotation(testInfo, agent.dumpDataString());
1118
1216
  },
1119
1217
  aiAssert: async ({ page }, use, testInfo) => {
1120
1218
  const agent = agentForPage(page, testInfo);
1121
1219
  await use(async (assertion, errorMsg) => {
1122
- await page.waitForLoadState("networkidle");
1123
- await agent.aiAssert(assertion, errorMsg);
1220
+ return new Promise((resolve, reject) => {
1221
+ import_test.test.step(`aiAssert - ${assertion}`, async () => {
1222
+ await page.waitForLoadState("networkidle");
1223
+ await agent.aiAssert(assertion, errorMsg);
1224
+ resolve(null);
1225
+ });
1226
+ });
1227
+ });
1228
+ updateDumpAnnotation(testInfo, agent.dumpDataString());
1229
+ },
1230
+ aiWaitFor: async ({ page }, use, testInfo) => {
1231
+ const agent = agentForPage(page, testInfo);
1232
+ await use(async (assertion, opt) => {
1233
+ return new Promise((resolve, reject) => {
1234
+ import_test.test.step(`aiWaitFor - ${assertion}`, async () => {
1235
+ await agent.aiWaitFor(assertion, opt);
1236
+ resolve(null);
1237
+ });
1238
+ });
1124
1239
  });
1125
1240
  updateDumpAnnotation(testInfo, agent.dumpDataString());
1126
1241
  }
1127
1242
  };
1128
1243
  };
1244
+
1245
+ // src/debug/index.ts
1246
+ var import_node_fs3 = require("fs");
1247
+ var import_node_path3 = __toESM(require("path"));
1248
+
1249
+ // src/img/img.ts
1250
+ var import_node_assert3 = __toESM(require("assert"));
1251
+ var import_node_buffer = require("buffer");
1252
+ var import_sharp = __toESM(require("sharp"));
1253
+ var createSvgOverlay = (elements, imageWidth, imageHeight) => {
1254
+ let svgContent = `<svg width="${imageWidth}" height="${imageHeight}" xmlns="http://www.w3.org/2000/svg">`;
1255
+ const colors = [
1256
+ { rect: "blue", text: "white" },
1257
+ { rect: "green", text: "white" }
1258
+ ];
1259
+ svgContent += "<defs>";
1260
+ elements.forEach((element, index) => {
1261
+ svgContent += `
1262
+ <clipPath id="clip${index}">
1263
+ <rect x="${element.x}" y="${element.y}" width="${element.width}" height="${element.height}" />
1264
+ </clipPath>
1265
+ `;
1266
+ });
1267
+ svgContent += "</defs>";
1268
+ elements.forEach((element, index) => {
1269
+ const textWidth = element.label.length * 8;
1270
+ const textHeight = 12;
1271
+ const rectWidth = textWidth + 5;
1272
+ const rectHeight = textHeight + 4;
1273
+ let rectX = element.x - rectWidth;
1274
+ let rectY = element.y + element.height / 2 - textHeight / 2 - 2;
1275
+ let textX = rectX + rectWidth / 2;
1276
+ let textY = rectY + rectHeight / 2 + 6;
1277
+ if (rectX < 0) {
1278
+ rectX = element.x;
1279
+ rectY = element.y - rectHeight;
1280
+ textX = rectX + rectWidth / 2;
1281
+ textY = rectY + rectHeight / 2 + 6;
1282
+ }
1283
+ const color = colors[index % colors.length];
1284
+ svgContent += `
1285
+ <rect x="${element.x}" y="${element.y}" width="${element.width}" height="${element.height}"
1286
+ style="fill:none;stroke:${color.rect};stroke-width:4" clip-path="url(#clip${index})" />
1287
+ <rect x="${rectX}" y="${rectY}" width="${rectWidth}" height="${rectHeight}" style="fill:${color.rect};" />
1288
+ <text x="${textX}" y="${textY}"
1289
+ text-anchor="middle" dominant-baseline="middle" style="fill:${color.text};font-size:12px;font-weight:bold;">
1290
+ ${element.label}
1291
+ </text>
1292
+ `;
1293
+ });
1294
+ svgContent += "</svg>";
1295
+ return import_node_buffer.Buffer.from(svgContent);
1296
+ };
1297
+ var processImageElementInfo = async (options) => {
1298
+ const base64Image = options.inputImgBase64.split(";base64,").pop();
1299
+ (0, import_node_assert3.default)(base64Image, "base64Image is undefined");
1300
+ const imageBuffer = import_node_buffer.Buffer.from(base64Image, "base64");
1301
+ const metadata = await (0, import_sharp.default)(imageBuffer).metadata();
1302
+ const { width, height } = metadata;
1303
+ if (width && height) {
1304
+ const svgOverlay = createSvgOverlay(
1305
+ options.elementsPositionInfo,
1306
+ width,
1307
+ height
1308
+ );
1309
+ const svgOverlayWithoutText = createSvgOverlay(
1310
+ options.elementsPositionInfoWithoutText,
1311
+ width,
1312
+ height
1313
+ );
1314
+ const compositeElementInfoImgBase64 = await (0, import_sharp.default)(imageBuffer).composite([{ input: svgOverlay, blend: "over" }]).toBuffer().then((data) => {
1315
+ return data.toString("base64");
1316
+ }).catch((err) => {
1317
+ throw err;
1318
+ });
1319
+ const compositeElementInfoImgWithoutTextBase64 = await (0, import_sharp.default)(imageBuffer).composite([{ input: svgOverlayWithoutText, blend: "over" }]).toBuffer().then((data) => {
1320
+ return data.toString("base64");
1321
+ }).catch((err) => {
1322
+ throw err;
1323
+ });
1324
+ return {
1325
+ compositeElementInfoImgBase64,
1326
+ compositeElementInfoImgWithoutTextBase64
1327
+ };
1328
+ }
1329
+ throw Error("Image processing failed because width or height is undefined");
1330
+ };
1331
+
1332
+ // src/img/util.ts
1333
+ async function getElementInfos(page) {
1334
+ const captureElementSnapshot = await getElementInfosFromPage(page);
1335
+ const elementsPositionInfo = captureElementSnapshot.map((elementInfo) => {
1336
+ return {
1337
+ label: elementInfo.indexId.toString(),
1338
+ x: elementInfo.rect.left,
1339
+ y: elementInfo.rect.top,
1340
+ width: elementInfo.rect.width,
1341
+ height: elementInfo.rect.height,
1342
+ attributes: elementInfo.attributes
1343
+ };
1344
+ });
1345
+ const elementsPositionInfoWithoutText = elementsPositionInfo.filter(
1346
+ (elementInfo) => {
1347
+ if (elementInfo.attributes.nodeType === "TEXT Node" /* TEXT */) {
1348
+ return false;
1349
+ }
1350
+ return true;
1351
+ }
1352
+ );
1353
+ return {
1354
+ elementsPositionInfo,
1355
+ captureElementSnapshot,
1356
+ elementsPositionInfoWithoutText
1357
+ };
1358
+ }
1359
+
1360
+ // src/debug/index.ts
1361
+ var import_image3 = require("@midscene/core/image");
1362
+ async function generateExtractData(page, targetDir, saveImgType) {
1363
+ const buffer = await page.screenshot({
1364
+ encoding: "base64"
1365
+ });
1366
+ const inputImgBase64 = buffer.toString("base64");
1367
+ const {
1368
+ elementsPositionInfo,
1369
+ captureElementSnapshot,
1370
+ elementsPositionInfoWithoutText
1371
+ } = await getElementInfos(page);
1372
+ const inputImagePath = import_node_path3.default.join(targetDir, "input.png");
1373
+ const outputImagePath = import_node_path3.default.join(targetDir, "output.png");
1374
+ const outputWithoutTextImgPath = import_node_path3.default.join(
1375
+ targetDir,
1376
+ "output_without_text.png"
1377
+ );
1378
+ const resizeOutputImgPath = import_node_path3.default.join(targetDir, "resize-output.png");
1379
+ const snapshotJsonPath = import_node_path3.default.join(targetDir, "element-snapshot.json");
1380
+ const {
1381
+ compositeElementInfoImgBase64,
1382
+ compositeElementInfoImgWithoutTextBase64
1383
+ } = await processImageElementInfo({
1384
+ elementsPositionInfo,
1385
+ elementsPositionInfoWithoutText,
1386
+ inputImgBase64
1387
+ });
1388
+ const resizeImgBase64 = await (0, import_image3.resizeImg)(inputImgBase64);
1389
+ if (!(saveImgType == null ? void 0 : saveImgType.disableSnapshot)) {
1390
+ writeFileSyncWithDir(
1391
+ snapshotJsonPath,
1392
+ JSON.stringify(captureElementSnapshot, null, 2)
1393
+ );
1394
+ }
1395
+ if (!(saveImgType == null ? void 0 : saveImgType.disableInputImage)) {
1396
+ await (0, import_image3.saveBase64Image)({
1397
+ base64Data: inputImgBase64,
1398
+ outputPath: inputImagePath
1399
+ });
1400
+ }
1401
+ if (!(saveImgType == null ? void 0 : saveImgType.disableOutputImage)) {
1402
+ await (0, import_image3.saveBase64Image)({
1403
+ base64Data: compositeElementInfoImgBase64,
1404
+ outputPath: outputImagePath
1405
+ });
1406
+ }
1407
+ if (!(saveImgType == null ? void 0 : saveImgType.disableOutputWithoutTextImg)) {
1408
+ await (0, import_image3.saveBase64Image)({
1409
+ base64Data: compositeElementInfoImgWithoutTextBase64,
1410
+ outputPath: outputWithoutTextImgPath
1411
+ });
1412
+ }
1413
+ if (!(saveImgType == null ? void 0 : saveImgType.disableResizeOutputImg)) {
1414
+ await (0, import_image3.saveBase64Image)({
1415
+ base64Data: resizeImgBase64,
1416
+ outputPath: resizeOutputImgPath
1417
+ });
1418
+ }
1419
+ }
1420
+ function ensureDirectoryExistence(filePath) {
1421
+ const dirname2 = import_node_path3.default.dirname(filePath);
1422
+ if ((0, import_node_fs3.existsSync)(dirname2)) {
1423
+ return;
1424
+ }
1425
+ ensureDirectoryExistence(dirname2);
1426
+ (0, import_node_fs3.mkdirSync)(dirname2);
1427
+ }
1428
+ function writeFileSyncWithDir(filePath, content, options = {}) {
1429
+ ensureDirectoryExistence(filePath);
1430
+ (0, import_node_fs3.writeFileSync)(filePath, content, options);
1431
+ }
1129
1432
  // Annotate the CommonJS export names for ESM import in node:
1130
1433
  0 && (module.exports = {
1131
1434
  PlaywrightAiFixture,
1132
- PuppeteerAgent
1435
+ PuppeteerAgent,
1436
+ generateExtractData
1133
1437
  });