@midscene/web 0.3.1-beta-20240821105917.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/es/index.js CHANGED
@@ -21,7 +21,7 @@ var __spreadValues = (a, b) => {
21
21
  return a;
22
22
  };
23
23
  var __spreadProps = (a, b) => __defProps(a, __getOwnPropDescs(b));
24
- var __commonJS = (cb, mod) => function __require() {
24
+ var __commonJS = (cb, mod) => function __require2() {
25
25
  return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
26
26
  };
27
27
  var __copyProps = (to, from, except, desc) => {
@@ -416,7 +416,6 @@ import assert from "assert";
416
416
  import fs, { readFileSync } from "fs";
417
417
  import path from "path";
418
418
  import {
419
- alignCoordByTrim,
420
419
  base64Encoded,
421
420
  imageInfoOfBase64
422
421
  } from "@midscene/core/image";
@@ -484,14 +483,6 @@ async function alignElements(screenshotBuffer, elements, page) {
484
483
  });
485
484
  for (const item of validElements) {
486
485
  const { rect, id, content, attributes, locator } = item;
487
- const aligned = await alignCoordByTrim(screenshotBuffer, rect);
488
- if (aligned.width < 0)
489
- continue;
490
- item.rect = aligned;
491
- item.center = [
492
- Math.round(aligned.left + aligned.width / 2),
493
- Math.round(aligned.top + aligned.height / 2)
494
- ];
495
486
  textsAligned.push(
496
487
  new WebElementInfo({
497
488
  rect,
@@ -1022,7 +1013,7 @@ ${errorTask == null ? void 0 : errorTask.errorStack}`);
1022
1013
  this.writeOutActionDumps();
1023
1014
  if (!(output == null ? void 0 : output.pass)) {
1024
1015
  const errMsg = msg || `Assertion failed: ${assertion}`;
1025
- const reasonMsg = `Reason: ${(output == null ? void 0 : output.thought) || "(no_reason)"}`;
1016
+ const reasonMsg = `Reason: ${output == null ? void 0 : output.thought} || (no_reason)`;
1026
1017
  throw new Error(`${errMsg}
1027
1018
  ${reasonMsg}`);
1028
1019
  }
@@ -1057,6 +1048,9 @@ ${errorTask == null ? void 0 : errorTask.errorStack}`);
1057
1048
  }
1058
1049
  };
1059
1050
 
1051
+ // src/playwright/index.ts
1052
+ import { test } from "@playwright/test";
1053
+
1060
1054
  // src/playwright/cache.ts
1061
1055
  import fs2 from "fs";
1062
1056
  import path2, { join } from "path";
@@ -1162,14 +1156,14 @@ var PlaywrightAiFixture = () => {
1162
1156
  }
1163
1157
  return pageAgentMap[idForPage];
1164
1158
  };
1165
- const updateDumpAnnotation = (test, dump) => {
1166
- const currentAnnotation = test.annotations.find((item) => {
1159
+ const updateDumpAnnotation = (test2, dump) => {
1160
+ const currentAnnotation = test2.annotations.find((item) => {
1167
1161
  return item.type === midsceneDumpAnnotationId;
1168
1162
  });
1169
1163
  if (currentAnnotation) {
1170
1164
  currentAnnotation.description = dump;
1171
1165
  } else {
1172
- test.annotations.push({
1166
+ test2.annotations.push({
1173
1167
  type: midsceneDumpAnnotationId,
1174
1168
  description: dump
1175
1169
  });
@@ -1181,10 +1175,14 @@ var PlaywrightAiFixture = () => {
1181
1175
  const agent = agentForPage(page, testInfo);
1182
1176
  await use(
1183
1177
  async (taskPrompt, opts) => {
1184
- await page.waitForLoadState("networkidle");
1185
- const actionType = (opts == null ? void 0 : opts.type) || "action";
1186
- const result = await agent.ai(taskPrompt, actionType);
1187
- return result;
1178
+ return new Promise((resolve, reject) => {
1179
+ test.step(`ai - ${taskPrompt}`, async () => {
1180
+ await page.waitForLoadState("networkidle");
1181
+ const actionType = (opts == null ? void 0 : opts.type) || "action";
1182
+ const result = await agent.ai(taskPrompt, actionType);
1183
+ resolve(result);
1184
+ });
1185
+ });
1188
1186
  }
1189
1187
  );
1190
1188
  const taskCacheJson = agent.taskExecutor.taskCache.generateTaskCache();
@@ -1195,38 +1193,243 @@ var PlaywrightAiFixture = () => {
1195
1193
  const { taskFile, taskTitle } = groupAndCaseForTest(testInfo);
1196
1194
  const agent = agentForPage(page, testInfo);
1197
1195
  await use(async (taskPrompt) => {
1198
- await page.waitForLoadState("networkidle");
1199
- await agent.aiAction(taskPrompt);
1196
+ test.step(`aiAction - ${taskPrompt}`, async () => {
1197
+ await page.waitForLoadState("networkidle");
1198
+ await agent.aiAction(taskPrompt);
1199
+ });
1200
1200
  });
1201
1201
  updateDumpAnnotation(testInfo, agent.dumpDataString());
1202
1202
  },
1203
1203
  aiQuery: async ({ page }, use, testInfo) => {
1204
1204
  const agent = agentForPage(page, testInfo);
1205
1205
  await use(async (demand) => {
1206
- await page.waitForLoadState("networkidle");
1207
- const result = await agent.aiQuery(demand);
1208
- return result;
1206
+ return new Promise((resolve, reject) => {
1207
+ test.step(`aiQuery - ${JSON.stringify(demand)}`, async () => {
1208
+ await page.waitForLoadState("networkidle");
1209
+ const result = await agent.aiQuery(demand);
1210
+ resolve(result);
1211
+ });
1212
+ });
1209
1213
  });
1210
1214
  updateDumpAnnotation(testInfo, agent.dumpDataString());
1211
1215
  },
1212
1216
  aiAssert: async ({ page }, use, testInfo) => {
1213
1217
  const agent = agentForPage(page, testInfo);
1214
1218
  await use(async (assertion, errorMsg) => {
1215
- await page.waitForLoadState("networkidle");
1216
- await agent.aiAssert(assertion, errorMsg);
1219
+ return new Promise((resolve, reject) => {
1220
+ test.step(`aiAssert - ${assertion}`, async () => {
1221
+ await page.waitForLoadState("networkidle");
1222
+ await agent.aiAssert(assertion, errorMsg);
1223
+ resolve(null);
1224
+ });
1225
+ });
1217
1226
  });
1218
1227
  updateDumpAnnotation(testInfo, agent.dumpDataString());
1219
1228
  },
1220
1229
  aiWaitFor: async ({ page }, use, testInfo) => {
1221
1230
  const agent = agentForPage(page, testInfo);
1222
1231
  await use(async (assertion, opt) => {
1223
- await agent.aiWaitFor(assertion, opt);
1232
+ return new Promise((resolve, reject) => {
1233
+ test.step(`aiWaitFor - ${assertion}`, async () => {
1234
+ await agent.aiWaitFor(assertion, opt);
1235
+ resolve(null);
1236
+ });
1237
+ });
1224
1238
  });
1225
1239
  updateDumpAnnotation(testInfo, agent.dumpDataString());
1226
1240
  }
1227
1241
  };
1228
1242
  };
1243
+
1244
+ // src/debug/index.ts
1245
+ import { existsSync, mkdirSync, writeFileSync } from "fs";
1246
+ import path3 from "path";
1247
+
1248
+ // src/img/img.ts
1249
+ import assert3 from "assert";
1250
+ import { Buffer as Buffer2 } from "buffer";
1251
+ import sharp from "sharp";
1252
+ var createSvgOverlay = (elements, imageWidth, imageHeight) => {
1253
+ let svgContent = `<svg width="${imageWidth}" height="${imageHeight}" xmlns="http://www.w3.org/2000/svg">`;
1254
+ const colors = [
1255
+ { rect: "blue", text: "white" },
1256
+ { rect: "green", text: "white" }
1257
+ ];
1258
+ svgContent += "<defs>";
1259
+ elements.forEach((element, index) => {
1260
+ svgContent += `
1261
+ <clipPath id="clip${index}">
1262
+ <rect x="${element.x}" y="${element.y}" width="${element.width}" height="${element.height}" />
1263
+ </clipPath>
1264
+ `;
1265
+ });
1266
+ svgContent += "</defs>";
1267
+ elements.forEach((element, index) => {
1268
+ const textWidth = element.label.length * 8;
1269
+ const textHeight = 12;
1270
+ const rectWidth = textWidth + 5;
1271
+ const rectHeight = textHeight + 4;
1272
+ let rectX = element.x - rectWidth;
1273
+ let rectY = element.y + element.height / 2 - textHeight / 2 - 2;
1274
+ let textX = rectX + rectWidth / 2;
1275
+ let textY = rectY + rectHeight / 2 + 6;
1276
+ if (rectX < 0) {
1277
+ rectX = element.x;
1278
+ rectY = element.y - rectHeight;
1279
+ textX = rectX + rectWidth / 2;
1280
+ textY = rectY + rectHeight / 2 + 6;
1281
+ }
1282
+ const color = colors[index % colors.length];
1283
+ svgContent += `
1284
+ <rect x="${element.x}" y="${element.y}" width="${element.width}" height="${element.height}"
1285
+ style="fill:none;stroke:${color.rect};stroke-width:4" clip-path="url(#clip${index})" />
1286
+ <rect x="${rectX}" y="${rectY}" width="${rectWidth}" height="${rectHeight}" style="fill:${color.rect};" />
1287
+ <text x="${textX}" y="${textY}"
1288
+ text-anchor="middle" dominant-baseline="middle" style="fill:${color.text};font-size:12px;font-weight:bold;">
1289
+ ${element.label}
1290
+ </text>
1291
+ `;
1292
+ });
1293
+ svgContent += "</svg>";
1294
+ return Buffer2.from(svgContent);
1295
+ };
1296
+ var processImageElementInfo = async (options) => {
1297
+ const base64Image = options.inputImgBase64.split(";base64,").pop();
1298
+ assert3(base64Image, "base64Image is undefined");
1299
+ const imageBuffer = Buffer2.from(base64Image, "base64");
1300
+ const metadata = await sharp(imageBuffer).metadata();
1301
+ const { width, height } = metadata;
1302
+ if (width && height) {
1303
+ const svgOverlay = createSvgOverlay(
1304
+ options.elementsPositionInfo,
1305
+ width,
1306
+ height
1307
+ );
1308
+ const svgOverlayWithoutText = createSvgOverlay(
1309
+ options.elementsPositionInfoWithoutText,
1310
+ width,
1311
+ height
1312
+ );
1313
+ const compositeElementInfoImgBase64 = await sharp(imageBuffer).composite([{ input: svgOverlay, blend: "over" }]).toBuffer().then((data) => {
1314
+ return data.toString("base64");
1315
+ }).catch((err) => {
1316
+ throw err;
1317
+ });
1318
+ const compositeElementInfoImgWithoutTextBase64 = await sharp(imageBuffer).composite([{ input: svgOverlayWithoutText, blend: "over" }]).toBuffer().then((data) => {
1319
+ return data.toString("base64");
1320
+ }).catch((err) => {
1321
+ throw err;
1322
+ });
1323
+ return {
1324
+ compositeElementInfoImgBase64,
1325
+ compositeElementInfoImgWithoutTextBase64
1326
+ };
1327
+ }
1328
+ throw Error("Image processing failed because width or height is undefined");
1329
+ };
1330
+
1331
+ // src/img/util.ts
1332
+ async function getElementInfos(page) {
1333
+ const captureElementSnapshot = await getElementInfosFromPage(page);
1334
+ const elementsPositionInfo = captureElementSnapshot.map((elementInfo) => {
1335
+ return {
1336
+ label: elementInfo.indexId.toString(),
1337
+ x: elementInfo.rect.left,
1338
+ y: elementInfo.rect.top,
1339
+ width: elementInfo.rect.width,
1340
+ height: elementInfo.rect.height,
1341
+ attributes: elementInfo.attributes
1342
+ };
1343
+ });
1344
+ const elementsPositionInfoWithoutText = elementsPositionInfo.filter(
1345
+ (elementInfo) => {
1346
+ if (elementInfo.attributes.nodeType === "TEXT Node" /* TEXT */) {
1347
+ return false;
1348
+ }
1349
+ return true;
1350
+ }
1351
+ );
1352
+ return {
1353
+ elementsPositionInfo,
1354
+ captureElementSnapshot,
1355
+ elementsPositionInfoWithoutText
1356
+ };
1357
+ }
1358
+
1359
+ // src/debug/index.ts
1360
+ import { resizeImg, saveBase64Image } from "@midscene/core/image";
1361
+ async function generateExtractData(page, targetDir, saveImgType) {
1362
+ const buffer = await page.screenshot({
1363
+ encoding: "base64"
1364
+ });
1365
+ const inputImgBase64 = buffer.toString("base64");
1366
+ const {
1367
+ elementsPositionInfo,
1368
+ captureElementSnapshot,
1369
+ elementsPositionInfoWithoutText
1370
+ } = await getElementInfos(page);
1371
+ const inputImagePath = path3.join(targetDir, "input.png");
1372
+ const outputImagePath = path3.join(targetDir, "output.png");
1373
+ const outputWithoutTextImgPath = path3.join(
1374
+ targetDir,
1375
+ "output_without_text.png"
1376
+ );
1377
+ const resizeOutputImgPath = path3.join(targetDir, "resize-output.png");
1378
+ const snapshotJsonPath = path3.join(targetDir, "element-snapshot.json");
1379
+ const {
1380
+ compositeElementInfoImgBase64,
1381
+ compositeElementInfoImgWithoutTextBase64
1382
+ } = await processImageElementInfo({
1383
+ elementsPositionInfo,
1384
+ elementsPositionInfoWithoutText,
1385
+ inputImgBase64
1386
+ });
1387
+ const resizeImgBase64 = await resizeImg(inputImgBase64);
1388
+ if (!(saveImgType == null ? void 0 : saveImgType.disableSnapshot)) {
1389
+ writeFileSyncWithDir(
1390
+ snapshotJsonPath,
1391
+ JSON.stringify(captureElementSnapshot, null, 2)
1392
+ );
1393
+ }
1394
+ if (!(saveImgType == null ? void 0 : saveImgType.disableInputImage)) {
1395
+ await saveBase64Image({
1396
+ base64Data: inputImgBase64,
1397
+ outputPath: inputImagePath
1398
+ });
1399
+ }
1400
+ if (!(saveImgType == null ? void 0 : saveImgType.disableOutputImage)) {
1401
+ await saveBase64Image({
1402
+ base64Data: compositeElementInfoImgBase64,
1403
+ outputPath: outputImagePath
1404
+ });
1405
+ }
1406
+ if (!(saveImgType == null ? void 0 : saveImgType.disableOutputWithoutTextImg)) {
1407
+ await saveBase64Image({
1408
+ base64Data: compositeElementInfoImgWithoutTextBase64,
1409
+ outputPath: outputWithoutTextImgPath
1410
+ });
1411
+ }
1412
+ if (!(saveImgType == null ? void 0 : saveImgType.disableResizeOutputImg)) {
1413
+ await saveBase64Image({
1414
+ base64Data: resizeImgBase64,
1415
+ outputPath: resizeOutputImgPath
1416
+ });
1417
+ }
1418
+ }
1419
+ function ensureDirectoryExistence(filePath) {
1420
+ const dirname2 = path3.dirname(filePath);
1421
+ if (existsSync(dirname2)) {
1422
+ return;
1423
+ }
1424
+ ensureDirectoryExistence(dirname2);
1425
+ mkdirSync(dirname2);
1426
+ }
1427
+ function writeFileSyncWithDir(filePath, content, options = {}) {
1428
+ ensureDirectoryExistence(filePath);
1429
+ writeFileSync(filePath, content, options);
1430
+ }
1229
1431
  export {
1230
1432
  PlaywrightAiFixture,
1231
- PageAgent as PuppeteerAgent
1433
+ PageAgent as PuppeteerAgent,
1434
+ generateExtractData
1232
1435
  };
@@ -325,7 +325,6 @@ import assert from "assert";
325
325
  import fs, { readFileSync } from "fs";
326
326
  import path from "path";
327
327
  import {
328
- alignCoordByTrim,
329
328
  base64Encoded,
330
329
  imageInfoOfBase64
331
330
  } from "@midscene/core/image";
package/dist/lib/index.js CHANGED
@@ -346,7 +346,8 @@ var require_dayjs_min = __commonJS({
346
346
  var src_exports = {};
347
347
  __export(src_exports, {
348
348
  PlaywrightAiFixture: () => PlaywrightAiFixture,
349
- PuppeteerAgent: () => PageAgent
349
+ PuppeteerAgent: () => PageAgent,
350
+ generateExtractData: () => generateExtractData
350
351
  });
351
352
  module.exports = __toCommonJS(src_exports);
352
353
 
@@ -487,14 +488,6 @@ async function alignElements(screenshotBuffer, elements, page) {
487
488
  });
488
489
  for (const item of validElements) {
489
490
  const { rect, id, content, attributes, locator } = item;
490
- const aligned = await (0, import_image.alignCoordByTrim)(screenshotBuffer, rect);
491
- if (aligned.width < 0)
492
- continue;
493
- item.rect = aligned;
494
- item.center = [
495
- Math.round(aligned.left + aligned.width / 2),
496
- Math.round(aligned.top + aligned.height / 2)
497
- ];
498
491
  textsAligned.push(
499
492
  new WebElementInfo({
500
493
  rect,
@@ -1025,7 +1018,7 @@ ${errorTask == null ? void 0 : errorTask.errorStack}`);
1025
1018
  this.writeOutActionDumps();
1026
1019
  if (!(output == null ? void 0 : output.pass)) {
1027
1020
  const errMsg = msg || `Assertion failed: ${assertion}`;
1028
- const reasonMsg = `Reason: ${(output == null ? void 0 : output.thought) || "(no_reason)"}`;
1021
+ const reasonMsg = `Reason: ${output == null ? void 0 : output.thought} || (no_reason)`;
1029
1022
  throw new Error(`${errMsg}
1030
1023
  ${reasonMsg}`);
1031
1024
  }
@@ -1060,6 +1053,9 @@ ${errorTask == null ? void 0 : errorTask.errorStack}`);
1060
1053
  }
1061
1054
  };
1062
1055
 
1056
+ // src/playwright/index.ts
1057
+ var import_test = require("@playwright/test");
1058
+
1063
1059
  // src/playwright/cache.ts
1064
1060
  var import_node_fs2 = __toESM(require("fs"));
1065
1061
  var import_node_path2 = __toESM(require("path"));
@@ -1161,14 +1157,14 @@ var PlaywrightAiFixture = () => {
1161
1157
  }
1162
1158
  return pageAgentMap[idForPage];
1163
1159
  };
1164
- const updateDumpAnnotation = (test, dump) => {
1165
- const currentAnnotation = test.annotations.find((item) => {
1160
+ const updateDumpAnnotation = (test2, dump) => {
1161
+ const currentAnnotation = test2.annotations.find((item) => {
1166
1162
  return item.type === midsceneDumpAnnotationId;
1167
1163
  });
1168
1164
  if (currentAnnotation) {
1169
1165
  currentAnnotation.description = dump;
1170
1166
  } else {
1171
- test.annotations.push({
1167
+ test2.annotations.push({
1172
1168
  type: midsceneDumpAnnotationId,
1173
1169
  description: dump
1174
1170
  });
@@ -1180,10 +1176,14 @@ var PlaywrightAiFixture = () => {
1180
1176
  const agent = agentForPage(page, testInfo);
1181
1177
  await use(
1182
1178
  async (taskPrompt, opts) => {
1183
- await page.waitForLoadState("networkidle");
1184
- const actionType = (opts == null ? void 0 : opts.type) || "action";
1185
- const result = await agent.ai(taskPrompt, actionType);
1186
- return result;
1179
+ return new Promise((resolve, reject) => {
1180
+ import_test.test.step(`ai - ${taskPrompt}`, async () => {
1181
+ await page.waitForLoadState("networkidle");
1182
+ const actionType = (opts == null ? void 0 : opts.type) || "action";
1183
+ const result = await agent.ai(taskPrompt, actionType);
1184
+ resolve(result);
1185
+ });
1186
+ });
1187
1187
  }
1188
1188
  );
1189
1189
  const taskCacheJson = agent.taskExecutor.taskCache.generateTaskCache();
@@ -1194,39 +1194,244 @@ var PlaywrightAiFixture = () => {
1194
1194
  const { taskFile, taskTitle } = groupAndCaseForTest(testInfo);
1195
1195
  const agent = agentForPage(page, testInfo);
1196
1196
  await use(async (taskPrompt) => {
1197
- await page.waitForLoadState("networkidle");
1198
- await agent.aiAction(taskPrompt);
1197
+ import_test.test.step(`aiAction - ${taskPrompt}`, async () => {
1198
+ await page.waitForLoadState("networkidle");
1199
+ await agent.aiAction(taskPrompt);
1200
+ });
1199
1201
  });
1200
1202
  updateDumpAnnotation(testInfo, agent.dumpDataString());
1201
1203
  },
1202
1204
  aiQuery: async ({ page }, use, testInfo) => {
1203
1205
  const agent = agentForPage(page, testInfo);
1204
1206
  await use(async (demand) => {
1205
- await page.waitForLoadState("networkidle");
1206
- const result = await agent.aiQuery(demand);
1207
- return result;
1207
+ return new Promise((resolve, reject) => {
1208
+ import_test.test.step(`aiQuery - ${JSON.stringify(demand)}`, async () => {
1209
+ await page.waitForLoadState("networkidle");
1210
+ const result = await agent.aiQuery(demand);
1211
+ resolve(result);
1212
+ });
1213
+ });
1208
1214
  });
1209
1215
  updateDumpAnnotation(testInfo, agent.dumpDataString());
1210
1216
  },
1211
1217
  aiAssert: async ({ page }, use, testInfo) => {
1212
1218
  const agent = agentForPage(page, testInfo);
1213
1219
  await use(async (assertion, errorMsg) => {
1214
- await page.waitForLoadState("networkidle");
1215
- await agent.aiAssert(assertion, errorMsg);
1220
+ return new Promise((resolve, reject) => {
1221
+ import_test.test.step(`aiAssert - ${assertion}`, async () => {
1222
+ await page.waitForLoadState("networkidle");
1223
+ await agent.aiAssert(assertion, errorMsg);
1224
+ resolve(null);
1225
+ });
1226
+ });
1216
1227
  });
1217
1228
  updateDumpAnnotation(testInfo, agent.dumpDataString());
1218
1229
  },
1219
1230
  aiWaitFor: async ({ page }, use, testInfo) => {
1220
1231
  const agent = agentForPage(page, testInfo);
1221
1232
  await use(async (assertion, opt) => {
1222
- await agent.aiWaitFor(assertion, opt);
1233
+ return new Promise((resolve, reject) => {
1234
+ import_test.test.step(`aiWaitFor - ${assertion}`, async () => {
1235
+ await agent.aiWaitFor(assertion, opt);
1236
+ resolve(null);
1237
+ });
1238
+ });
1223
1239
  });
1224
1240
  updateDumpAnnotation(testInfo, agent.dumpDataString());
1225
1241
  }
1226
1242
  };
1227
1243
  };
1244
+
1245
+ // src/debug/index.ts
1246
+ var import_node_fs3 = require("fs");
1247
+ var import_node_path3 = __toESM(require("path"));
1248
+
1249
+ // src/img/img.ts
1250
+ var import_node_assert3 = __toESM(require("assert"));
1251
+ var import_node_buffer = require("buffer");
1252
+ var import_sharp = __toESM(require("sharp"));
1253
+ var createSvgOverlay = (elements, imageWidth, imageHeight) => {
1254
+ let svgContent = `<svg width="${imageWidth}" height="${imageHeight}" xmlns="http://www.w3.org/2000/svg">`;
1255
+ const colors = [
1256
+ { rect: "blue", text: "white" },
1257
+ { rect: "green", text: "white" }
1258
+ ];
1259
+ svgContent += "<defs>";
1260
+ elements.forEach((element, index) => {
1261
+ svgContent += `
1262
+ <clipPath id="clip${index}">
1263
+ <rect x="${element.x}" y="${element.y}" width="${element.width}" height="${element.height}" />
1264
+ </clipPath>
1265
+ `;
1266
+ });
1267
+ svgContent += "</defs>";
1268
+ elements.forEach((element, index) => {
1269
+ const textWidth = element.label.length * 8;
1270
+ const textHeight = 12;
1271
+ const rectWidth = textWidth + 5;
1272
+ const rectHeight = textHeight + 4;
1273
+ let rectX = element.x - rectWidth;
1274
+ let rectY = element.y + element.height / 2 - textHeight / 2 - 2;
1275
+ let textX = rectX + rectWidth / 2;
1276
+ let textY = rectY + rectHeight / 2 + 6;
1277
+ if (rectX < 0) {
1278
+ rectX = element.x;
1279
+ rectY = element.y - rectHeight;
1280
+ textX = rectX + rectWidth / 2;
1281
+ textY = rectY + rectHeight / 2 + 6;
1282
+ }
1283
+ const color = colors[index % colors.length];
1284
+ svgContent += `
1285
+ <rect x="${element.x}" y="${element.y}" width="${element.width}" height="${element.height}"
1286
+ style="fill:none;stroke:${color.rect};stroke-width:4" clip-path="url(#clip${index})" />
1287
+ <rect x="${rectX}" y="${rectY}" width="${rectWidth}" height="${rectHeight}" style="fill:${color.rect};" />
1288
+ <text x="${textX}" y="${textY}"
1289
+ text-anchor="middle" dominant-baseline="middle" style="fill:${color.text};font-size:12px;font-weight:bold;">
1290
+ ${element.label}
1291
+ </text>
1292
+ `;
1293
+ });
1294
+ svgContent += "</svg>";
1295
+ return import_node_buffer.Buffer.from(svgContent);
1296
+ };
1297
+ var processImageElementInfo = async (options) => {
1298
+ const base64Image = options.inputImgBase64.split(";base64,").pop();
1299
+ (0, import_node_assert3.default)(base64Image, "base64Image is undefined");
1300
+ const imageBuffer = import_node_buffer.Buffer.from(base64Image, "base64");
1301
+ const metadata = await (0, import_sharp.default)(imageBuffer).metadata();
1302
+ const { width, height } = metadata;
1303
+ if (width && height) {
1304
+ const svgOverlay = createSvgOverlay(
1305
+ options.elementsPositionInfo,
1306
+ width,
1307
+ height
1308
+ );
1309
+ const svgOverlayWithoutText = createSvgOverlay(
1310
+ options.elementsPositionInfoWithoutText,
1311
+ width,
1312
+ height
1313
+ );
1314
+ const compositeElementInfoImgBase64 = await (0, import_sharp.default)(imageBuffer).composite([{ input: svgOverlay, blend: "over" }]).toBuffer().then((data) => {
1315
+ return data.toString("base64");
1316
+ }).catch((err) => {
1317
+ throw err;
1318
+ });
1319
+ const compositeElementInfoImgWithoutTextBase64 = await (0, import_sharp.default)(imageBuffer).composite([{ input: svgOverlayWithoutText, blend: "over" }]).toBuffer().then((data) => {
1320
+ return data.toString("base64");
1321
+ }).catch((err) => {
1322
+ throw err;
1323
+ });
1324
+ return {
1325
+ compositeElementInfoImgBase64,
1326
+ compositeElementInfoImgWithoutTextBase64
1327
+ };
1328
+ }
1329
+ throw Error("Image processing failed because width or height is undefined");
1330
+ };
1331
+
1332
+ // src/img/util.ts
1333
+ async function getElementInfos(page) {
1334
+ const captureElementSnapshot = await getElementInfosFromPage(page);
1335
+ const elementsPositionInfo = captureElementSnapshot.map((elementInfo) => {
1336
+ return {
1337
+ label: elementInfo.indexId.toString(),
1338
+ x: elementInfo.rect.left,
1339
+ y: elementInfo.rect.top,
1340
+ width: elementInfo.rect.width,
1341
+ height: elementInfo.rect.height,
1342
+ attributes: elementInfo.attributes
1343
+ };
1344
+ });
1345
+ const elementsPositionInfoWithoutText = elementsPositionInfo.filter(
1346
+ (elementInfo) => {
1347
+ if (elementInfo.attributes.nodeType === "TEXT Node" /* TEXT */) {
1348
+ return false;
1349
+ }
1350
+ return true;
1351
+ }
1352
+ );
1353
+ return {
1354
+ elementsPositionInfo,
1355
+ captureElementSnapshot,
1356
+ elementsPositionInfoWithoutText
1357
+ };
1358
+ }
1359
+
1360
+ // src/debug/index.ts
1361
+ var import_image3 = require("@midscene/core/image");
1362
+ async function generateExtractData(page, targetDir, saveImgType) {
1363
+ const buffer = await page.screenshot({
1364
+ encoding: "base64"
1365
+ });
1366
+ const inputImgBase64 = buffer.toString("base64");
1367
+ const {
1368
+ elementsPositionInfo,
1369
+ captureElementSnapshot,
1370
+ elementsPositionInfoWithoutText
1371
+ } = await getElementInfos(page);
1372
+ const inputImagePath = import_node_path3.default.join(targetDir, "input.png");
1373
+ const outputImagePath = import_node_path3.default.join(targetDir, "output.png");
1374
+ const outputWithoutTextImgPath = import_node_path3.default.join(
1375
+ targetDir,
1376
+ "output_without_text.png"
1377
+ );
1378
+ const resizeOutputImgPath = import_node_path3.default.join(targetDir, "resize-output.png");
1379
+ const snapshotJsonPath = import_node_path3.default.join(targetDir, "element-snapshot.json");
1380
+ const {
1381
+ compositeElementInfoImgBase64,
1382
+ compositeElementInfoImgWithoutTextBase64
1383
+ } = await processImageElementInfo({
1384
+ elementsPositionInfo,
1385
+ elementsPositionInfoWithoutText,
1386
+ inputImgBase64
1387
+ });
1388
+ const resizeImgBase64 = await (0, import_image3.resizeImg)(inputImgBase64);
1389
+ if (!(saveImgType == null ? void 0 : saveImgType.disableSnapshot)) {
1390
+ writeFileSyncWithDir(
1391
+ snapshotJsonPath,
1392
+ JSON.stringify(captureElementSnapshot, null, 2)
1393
+ );
1394
+ }
1395
+ if (!(saveImgType == null ? void 0 : saveImgType.disableInputImage)) {
1396
+ await (0, import_image3.saveBase64Image)({
1397
+ base64Data: inputImgBase64,
1398
+ outputPath: inputImagePath
1399
+ });
1400
+ }
1401
+ if (!(saveImgType == null ? void 0 : saveImgType.disableOutputImage)) {
1402
+ await (0, import_image3.saveBase64Image)({
1403
+ base64Data: compositeElementInfoImgBase64,
1404
+ outputPath: outputImagePath
1405
+ });
1406
+ }
1407
+ if (!(saveImgType == null ? void 0 : saveImgType.disableOutputWithoutTextImg)) {
1408
+ await (0, import_image3.saveBase64Image)({
1409
+ base64Data: compositeElementInfoImgWithoutTextBase64,
1410
+ outputPath: outputWithoutTextImgPath
1411
+ });
1412
+ }
1413
+ if (!(saveImgType == null ? void 0 : saveImgType.disableResizeOutputImg)) {
1414
+ await (0, import_image3.saveBase64Image)({
1415
+ base64Data: resizeImgBase64,
1416
+ outputPath: resizeOutputImgPath
1417
+ });
1418
+ }
1419
+ }
1420
+ function ensureDirectoryExistence(filePath) {
1421
+ const dirname2 = import_node_path3.default.dirname(filePath);
1422
+ if ((0, import_node_fs3.existsSync)(dirname2)) {
1423
+ return;
1424
+ }
1425
+ ensureDirectoryExistence(dirname2);
1426
+ (0, import_node_fs3.mkdirSync)(dirname2);
1427
+ }
1428
+ function writeFileSyncWithDir(filePath, content, options = {}) {
1429
+ ensureDirectoryExistence(filePath);
1430
+ (0, import_node_fs3.writeFileSync)(filePath, content, options);
1431
+ }
1228
1432
  // Annotate the CommonJS export names for ESM import in node:
1229
1433
  0 && (module.exports = {
1230
1434
  PlaywrightAiFixture,
1231
- PuppeteerAgent
1435
+ PuppeteerAgent,
1436
+ generateExtractData
1232
1437
  });
@@ -581,8 +581,8 @@ var midscene_element_inspector = (() => {
581
581
  });
582
582
 
583
583
  // src/extractor/dom-util.ts
584
- function isFormElement(node) {
585
- return node instanceof HTMLElement && (node.tagName.toLowerCase() === "input" || node.tagName.toLowerCase() === "textarea" || node.tagName.toLowerCase() === "label" || node.tagName.toLowerCase() === "select" || node.tagName.toLowerCase() === "option");
584
+ function isInputElement(node) {
585
+ return node instanceof HTMLElement && (node.tagName.toLowerCase() === "input" || node.tagName.toLowerCase() === "textarea");
586
586
  }
587
587
  function isButtonElement(node) {
588
588
  return node instanceof HTMLElement && node.tagName.toLowerCase() === "button";
@@ -644,23 +644,23 @@ var midscene_element_inspector = (() => {
644
644
  }
645
645
  function visibleRect(el) {
646
646
  if (!el) {
647
- logger("Element is not in the DOM hierarchy");
647
+ logger(el, "Element is not in the DOM hierarchy");
648
648
  return false;
649
649
  }
650
650
  if (!(el instanceof HTMLElement) && el.nodeType !== Node.TEXT_NODE) {
651
- logger("Element is not in the DOM hierarchy");
651
+ logger(el, "Element is not in the DOM hierarchy");
652
652
  return false;
653
653
  }
654
654
  if (el instanceof HTMLElement) {
655
655
  const style = window.getComputedStyle(el);
656
656
  if (style.display === "none" || style.visibility === "hidden" || style.opacity === "0" && el.tagName !== "INPUT") {
657
- logger("Element is hidden");
657
+ logger(el, "Element is hidden");
658
658
  return false;
659
659
  }
660
660
  }
661
661
  const rect = getRect(el);
662
662
  if (rect.width === 0 && rect.height === 0) {
663
- logger("Element has no size");
663
+ logger(el, "Element has no size");
664
664
  return false;
665
665
  }
666
666
  const scrollLeft = window.pageXOffset || document.documentElement.scrollLeft;
@@ -669,8 +669,13 @@ var midscene_element_inspector = (() => {
669
669
  const viewportHeight = window.innerHeight || document.documentElement.clientHeight;
670
670
  const isPartiallyInViewport = rect.right > 0 && rect.bottom > 0 && rect.left < viewportWidth && rect.top < viewportHeight;
671
671
  if (!isPartiallyInViewport) {
672
- logger("Element is completely outside the viewport");
673
- logger(rect, viewportHeight, viewportWidth, scrollTop, scrollLeft);
672
+ logger(el, "Element is completely outside the viewport", {
673
+ rect,
674
+ viewportHeight,
675
+ viewportWidth,
676
+ scrollTop,
677
+ scrollLeft
678
+ });
674
679
  return false;
675
680
  }
676
681
  let parent = el;
@@ -739,7 +744,6 @@ var midscene_element_inspector = (() => {
739
744
  }
740
745
  const shouldContinue = collectElementInfo(node);
741
746
  if (!shouldContinue) {
742
- logger("should NOT continue for node", node);
743
747
  return;
744
748
  }
745
749
  for (let i = 0; i < node.childNodes.length; i++) {
@@ -753,29 +757,22 @@ var midscene_element_inspector = (() => {
753
757
  logger("collectElementInfo", node, node.nodeName, rect);
754
758
  if (!rect) {
755
759
  logger("Element is not visible", node);
756
- return;
760
+ return true;
757
761
  }
758
- if (isFormElement(node)) {
762
+ if (isInputElement(node)) {
759
763
  const attributes = getNodeAttributes(node);
760
764
  const nodeHashId = generateHash(attributes.placeholder, rect);
761
765
  const selector = setDataForNode(node, nodeHashId);
762
- let valueContent = attributes.value || attributes.placeholder || node.textContent || "";
763
- const tagName = node.tagName.toLowerCase();
764
- if (node.tagName.toLowerCase() === "select") {
765
- const selectedOption = node.options[node.selectedIndex];
766
- valueContent = selectedOption.textContent || "";
767
- }
768
766
  elementInfoArray.push({
769
767
  id: nodeHashId,
770
768
  indexId: generateId(nodeIndex++),
771
769
  nodeHashId,
772
770
  locator: selector,
773
- nodeType: "FORM_ITEM Node" /* FORM_ITEM */,
771
+ nodeType: "INPUT Node" /* INPUT */,
774
772
  attributes: __spreadProps(__spreadValues({}, attributes), {
775
- htmlTagName: `<${tagName}>`,
776
- nodeType: "FORM_ITEM Node" /* FORM_ITEM */
773
+ nodeType: "INPUT Node" /* INPUT */
777
774
  }),
778
- content: valueContent.trim(),
775
+ content: attributes.placeholder || "",
779
776
  rect,
780
777
  center: [
781
778
  Math.round(rect.left + rect.width / 2),
@@ -783,8 +780,6 @@ var midscene_element_inspector = (() => {
783
780
  ],
784
781
  htmlNode: debugMode2 ? node : null
785
782
  });
786
- if (tagName === "label")
787
- return true;
788
783
  return;
789
784
  }
790
785
  if (isButtonElement(node)) {
@@ -841,10 +836,6 @@ var midscene_element_inspector = (() => {
841
836
  return;
842
837
  }
843
838
  const attributes = getNodeAttributes(node);
844
- const attributeKeys = Object.keys(attributes);
845
- if (!text.trim() && attributeKeys.length === 0) {
846
- return;
847
- }
848
839
  const nodeHashId = generateHash(text, rect);
849
840
  const selector = setDataForNode(node, nodeHashId);
850
841
  elementInfoArray.push({
@@ -570,8 +570,8 @@ var midscene_element_inspector = (() => {
570
570
  });
571
571
 
572
572
  // src/extractor/dom-util.ts
573
- function isFormElement(node) {
574
- return node instanceof HTMLElement && (node.tagName.toLowerCase() === "input" || node.tagName.toLowerCase() === "textarea" || node.tagName.toLowerCase() === "label" || node.tagName.toLowerCase() === "select" || node.tagName.toLowerCase() === "option");
573
+ function isInputElement(node) {
574
+ return node instanceof HTMLElement && (node.tagName.toLowerCase() === "input" || node.tagName.toLowerCase() === "textarea");
575
575
  }
576
576
  function isButtonElement(node) {
577
577
  return node instanceof HTMLElement && node.tagName.toLowerCase() === "button";
@@ -633,23 +633,23 @@ var midscene_element_inspector = (() => {
633
633
  }
634
634
  function visibleRect(el) {
635
635
  if (!el) {
636
- logger("Element is not in the DOM hierarchy");
636
+ logger(el, "Element is not in the DOM hierarchy");
637
637
  return false;
638
638
  }
639
639
  if (!(el instanceof HTMLElement) && el.nodeType !== Node.TEXT_NODE) {
640
- logger("Element is not in the DOM hierarchy");
640
+ logger(el, "Element is not in the DOM hierarchy");
641
641
  return false;
642
642
  }
643
643
  if (el instanceof HTMLElement) {
644
644
  const style = window.getComputedStyle(el);
645
645
  if (style.display === "none" || style.visibility === "hidden" || style.opacity === "0" && el.tagName !== "INPUT") {
646
- logger("Element is hidden");
646
+ logger(el, "Element is hidden");
647
647
  return false;
648
648
  }
649
649
  }
650
650
  const rect = getRect(el);
651
651
  if (rect.width === 0 && rect.height === 0) {
652
- logger("Element has no size");
652
+ logger(el, "Element has no size");
653
653
  return false;
654
654
  }
655
655
  const scrollLeft = window.pageXOffset || document.documentElement.scrollLeft;
@@ -658,8 +658,13 @@ var midscene_element_inspector = (() => {
658
658
  const viewportHeight = window.innerHeight || document.documentElement.clientHeight;
659
659
  const isPartiallyInViewport = rect.right > 0 && rect.bottom > 0 && rect.left < viewportWidth && rect.top < viewportHeight;
660
660
  if (!isPartiallyInViewport) {
661
- logger("Element is completely outside the viewport");
662
- logger(rect, viewportHeight, viewportWidth, scrollTop, scrollLeft);
661
+ logger(el, "Element is completely outside the viewport", {
662
+ rect,
663
+ viewportHeight,
664
+ viewportWidth,
665
+ scrollTop,
666
+ scrollLeft
667
+ });
663
668
  return false;
664
669
  }
665
670
  let parent = el;
@@ -728,7 +733,6 @@ var midscene_element_inspector = (() => {
728
733
  }
729
734
  const shouldContinue = collectElementInfo(node);
730
735
  if (!shouldContinue) {
731
- logger("should NOT continue for node", node);
732
736
  return;
733
737
  }
734
738
  for (let i = 0; i < node.childNodes.length; i++) {
@@ -742,29 +746,22 @@ var midscene_element_inspector = (() => {
742
746
  logger("collectElementInfo", node, node.nodeName, rect);
743
747
  if (!rect) {
744
748
  logger("Element is not visible", node);
745
- return;
749
+ return true;
746
750
  }
747
- if (isFormElement(node)) {
751
+ if (isInputElement(node)) {
748
752
  const attributes = getNodeAttributes(node);
749
753
  const nodeHashId = generateHash(attributes.placeholder, rect);
750
754
  const selector = setDataForNode(node, nodeHashId);
751
- let valueContent = attributes.value || attributes.placeholder || node.textContent || "";
752
- const tagName = node.tagName.toLowerCase();
753
- if (node.tagName.toLowerCase() === "select") {
754
- const selectedOption = node.options[node.selectedIndex];
755
- valueContent = selectedOption.textContent || "";
756
- }
757
755
  elementInfoArray.push({
758
756
  id: nodeHashId,
759
757
  indexId: generateId(nodeIndex++),
760
758
  nodeHashId,
761
759
  locator: selector,
762
- nodeType: "FORM_ITEM Node" /* FORM_ITEM */,
760
+ nodeType: "INPUT Node" /* INPUT */,
763
761
  attributes: __spreadProps(__spreadValues({}, attributes), {
764
- htmlTagName: `<${tagName}>`,
765
- nodeType: "FORM_ITEM Node" /* FORM_ITEM */
762
+ nodeType: "INPUT Node" /* INPUT */
766
763
  }),
767
- content: valueContent.trim(),
764
+ content: attributes.placeholder || "",
768
765
  rect,
769
766
  center: [
770
767
  Math.round(rect.left + rect.width / 2),
@@ -772,8 +769,6 @@ var midscene_element_inspector = (() => {
772
769
  ],
773
770
  htmlNode: debugMode2 ? node : null
774
771
  });
775
- if (tagName === "label")
776
- return true;
777
772
  return;
778
773
  }
779
774
  if (isButtonElement(node)) {
@@ -830,10 +825,6 @@ var midscene_element_inspector = (() => {
830
825
  return;
831
826
  }
832
827
  const attributes = getNodeAttributes(node);
833
- const attributeKeys = Object.keys(attributes);
834
- if (!text.trim() && attributeKeys.length === 0) {
835
- return;
836
- }
837
828
  const nodeHashId = generateHash(text, rect);
838
829
  const selector = setDataForNode(node, nodeHashId);
839
830
  elementInfoArray.push({
@@ -1,5 +1,5 @@
1
1
  declare enum NodeType {
2
- FORM_ITEM = "FORM_ITEM Node",
2
+ INPUT = "INPUT Node",
3
3
  BUTTON = "BUTTON Node",
4
4
  IMG = "IMG Node",
5
5
  TEXT = "TEXT Node"
@@ -7,7 +7,7 @@ import Insight, { BaseElement, Rect, UIContext, PlanningAction, AIElementParseRe
7
7
  type WebPage = Page | Page$1;
8
8
 
9
9
  declare enum NodeType {
10
- FORM_ITEM = "FORM_ITEM Node",
10
+ INPUT = "INPUT Node",
11
11
  BUTTON = "BUTTON Node",
12
12
  IMG = "IMG Node",
13
13
  TEXT = "TEXT Node"
@@ -180,4 +180,12 @@ declare class PageAgent {
180
180
  ai(taskPrompt: string, type?: string): Promise<any>;
181
181
  }
182
182
 
183
- export { type PlayWrightAiFixtureType, PlaywrightAiFixture, PageAgent as PuppeteerAgent };
183
+ declare function generateExtractData(page: WebPage, targetDir: string, saveImgType?: {
184
+ disableInputImage: boolean;
185
+ disableOutputImage: boolean;
186
+ disableOutputWithoutTextImg: boolean;
187
+ disableResizeOutputImg: boolean;
188
+ disableSnapshot: boolean;
189
+ }): Promise<void>;
190
+
191
+ export { type PlayWrightAiFixtureType, PlaywrightAiFixture, PageAgent as PuppeteerAgent, generateExtractData };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/web",
3
3
  "description": "Web integration for Midscene.js",
4
- "version": "0.3.1-beta-20240821105917.0",
4
+ "version": "0.3.2",
5
5
  "jsnext:source": "./src/index.ts",
6
6
  "main": "./dist/lib/index.js",
7
7
  "module": "./dist/es/index.js",
@@ -52,7 +52,7 @@
52
52
  "openai": "4.47.1",
53
53
  "sharp": "0.33.3",
54
54
  "inquirer": "10.1.5",
55
- "@midscene/core": "0.3.1-beta-20240821105917.0"
55
+ "@midscene/core": "0.3.2"
56
56
  },
57
57
  "devDependencies": {
58
58
  "@modern-js/module-tools": "^2.56.1",
@@ -70,7 +70,7 @@
70
70
  "peerDependencies": {
71
71
  "@playwright/test": "^1.44.1",
72
72
  "playwright": "^1.44.1",
73
- "puppeteer": "^23.0.2"
73
+ "puppeteer": ">=20.0.0"
74
74
  },
75
75
  "peerDependenciesMeta": {
76
76
  "@playwright/test": {
@@ -100,8 +100,9 @@
100
100
  "new": "modern new",
101
101
  "upgrade": "modern upgrade",
102
102
  "e2e": "playwright test --config=playwright.config.ts",
103
+ "e2e:report": "MIDSCENE_REPORT=true playwright test --config=playwright.config.ts",
103
104
  "e2e:cache": "MIDSCENE_CACHE=true playwright test --config=playwright.config.ts",
104
105
  "e2e:ui": "playwright test --config=playwright.config.ts --ui",
105
- "e2e:ui-cache": "MIDSCENE_CACHE=true playwright test --config=playwright.config.ts --ui"
106
+ "e2e:ui:cache": "MIDSCENE_CACHE=true playwright test --config=playwright.config.ts --ui"
106
107
  }
107
108
  }