@midscene/web 0.5.2-beta-20241010035503.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/es/appium.js CHANGED
@@ -894,8 +894,13 @@ import assert from "assert";
894
894
  import { randomUUID } from "crypto";
895
895
  import { readFileSync } from "fs";
896
896
  import path from "path";
897
+ import { NodeType } from "@midscene/shared/constants";
897
898
  import { findNearestPackageJson } from "@midscene/shared/fs";
898
- import { base64Encoded, imageInfoOfBase64 } from "@midscene/shared/img";
899
+ import {
900
+ base64Encoded,
901
+ imageInfoOfBase64
902
+ } from "@midscene/shared/img";
903
+ import { compositeElementInfoImg } from "@midscene/shared/img";
899
904
 
900
905
  // src/web-element.ts
901
906
  var WebElementInfo = class {
@@ -905,7 +910,8 @@ var WebElementInfo = class {
905
910
  page,
906
911
  locator,
907
912
  id,
908
- attributes
913
+ attributes,
914
+ indexId
909
915
  }) {
910
916
  this.content = content;
911
917
  this.rect = rect;
@@ -917,6 +923,7 @@ var WebElementInfo = class {
917
923
  this.locator = locator;
918
924
  this.id = id;
919
925
  this.attributes = attributes;
926
+ this.indexId = indexId;
920
927
  }
921
928
  };
922
929
 
@@ -925,30 +932,35 @@ async function parseContextFromWebPage(page, _opt) {
925
932
  assert(page, "page is required");
926
933
  const url = page.url();
927
934
  const file = await page.screenshot();
928
- const screenshotBuffer = readFileSync(file);
929
935
  const screenshotBase64 = base64Encoded(file);
930
936
  const captureElementSnapshot = await page.getElementInfos();
931
- const elementsInfo = await alignElements(
932
- screenshotBuffer,
933
- captureElementSnapshot,
934
- page
935
- );
937
+ const elementsInfo = await alignElements(captureElementSnapshot, page);
938
+ const elementsPositionInfoWithoutText = elementsInfo.filter((elementInfo) => {
939
+ if (elementInfo.attributes.nodeType === NodeType.TEXT) {
940
+ return false;
941
+ }
942
+ return true;
943
+ });
936
944
  const size = await imageInfoOfBase64(screenshotBase64);
945
+ const screenshotBase64WithElementInfos = await compositeElementInfoImg({
946
+ inputImgBase64: screenshotBase64.split(";base64,").pop(),
947
+ elementsPositionInfo: elementsPositionInfoWithoutText
948
+ });
937
949
  return {
938
950
  content: elementsInfo,
939
951
  size,
940
- screenshotBase64,
952
+ screenshotBase64: `data:image/png;base64,${screenshotBase64WithElementInfos}`,
941
953
  url
942
954
  };
943
955
  }
944
956
  var sizeThreshold = 3;
945
- async function alignElements(screenshotBuffer, elements, page) {
957
+ async function alignElements(elements, page) {
946
958
  const validElements = elements.filter((item) => {
947
959
  return item.rect.height >= sizeThreshold && item.rect.width >= sizeThreshold;
948
960
  });
949
961
  const textsAligned = [];
950
962
  for (const item of validElements) {
951
- const { rect, id, content, attributes, locator } = item;
963
+ const { rect, id, content, attributes, locator, indexId } = item;
952
964
  textsAligned.push(
953
965
  new WebElementInfo({
954
966
  rect,
@@ -956,7 +968,8 @@ async function alignElements(screenshotBuffer, elements, page) {
956
968
  id,
957
969
  content,
958
970
  attributes,
959
- page
971
+ page,
972
+ indexId
960
973
  })
961
974
  );
962
975
  }
@@ -1510,13 +1523,13 @@ var PageTaskExecutor = class {
1510
1523
  const assertTask = await this.convertPlanToExecutable([assertPlan]);
1511
1524
  await taskExecutor.append(this.wrapExecutorWithScreenshot(assertTask[0]));
1512
1525
  const output = await taskExecutor.flush();
1513
- if (output.pass) {
1526
+ if (output == null ? void 0 : output.pass) {
1514
1527
  return {
1515
1528
  output: void 0,
1516
1529
  executor: taskExecutor
1517
1530
  };
1518
1531
  }
1519
- errorThought = output.thought;
1532
+ errorThought = (output == null ? void 0 : output.thought) || "unknown error";
1520
1533
  const now = Date.now();
1521
1534
  if (now - startTime < checkIntervalMs) {
1522
1535
  const timeRemaining = checkIntervalMs - (now - startTime);
@@ -1666,7 +1679,7 @@ import { DOMParser } from "@xmldom/xmldom";
1666
1679
  import {
1667
1680
  CONTAINER_MINI_HEIGHT,
1668
1681
  CONTAINER_MINI_WIDTH,
1669
- NodeType
1682
+ NodeType as NodeType2
1670
1683
  } from "@midscene/shared/constants";
1671
1684
 
1672
1685
  // src/extractor/util.ts
@@ -1684,13 +1697,10 @@ function midsceneGenerateHash(content, rect) {
1684
1697
  const hashHex = (0, import_js_sha256.default)(combined);
1685
1698
  return hashHex.slice(0, 10);
1686
1699
  }
1687
- function generateId(numberId) {
1688
- return `${numberId}`;
1689
- }
1690
1700
 
1691
1701
  // src/extractor/constants.ts
1692
1702
  import {
1693
- NodeType as NodeType2,
1703
+ NodeType as NodeType3,
1694
1704
  TEXT_MAX_SIZE,
1695
1705
  TEXT_SIZE_THRESHOLD
1696
1706
  } from "@midscene/shared/constants";
@@ -1802,35 +1812,35 @@ function extractTextWithPosition2(initNode) {
1802
1812
  let nodeType;
1803
1813
  switch (node.nodeName.toUpperCase()) {
1804
1814
  case "TEXT":
1805
- nodeType = NodeType2.TEXT;
1815
+ nodeType = NodeType3.TEXT;
1806
1816
  break;
1807
1817
  case "IMAGE":
1808
- nodeType = NodeType2.IMG;
1818
+ nodeType = NodeType3.IMG;
1809
1819
  break;
1810
1820
  case "BUTTON":
1811
- nodeType = NodeType2.BUTTON;
1821
+ nodeType = NodeType3.BUTTON;
1812
1822
  break;
1813
1823
  case "SEARCHINPUT":
1814
1824
  case "INPUT":
1815
- nodeType = NodeType2.FORM_ITEM;
1825
+ nodeType = NodeType3.FORM_ITEM;
1816
1826
  break;
1817
1827
  case "NAV":
1818
1828
  case "LIST":
1819
1829
  case "CELL":
1820
- nodeType = NodeType2.CONTAINER;
1830
+ nodeType = NodeType3.CONTAINER;
1821
1831
  break;
1822
1832
  default:
1823
1833
  if (attributes.id === "android:id/input" || attributes.id === "android:id/inputArea") {
1824
- nodeType = NodeType2.FORM_ITEM;
1834
+ nodeType = NodeType3.FORM_ITEM;
1825
1835
  } else {
1826
- nodeType = NodeType2.CONTAINER;
1836
+ nodeType = NodeType3.CONTAINER;
1827
1837
  }
1828
1838
  break;
1829
1839
  }
1830
1840
  const xpath = getXPathForElement(node);
1831
1841
  const elementInfo = {
1832
1842
  id: nodeHashId,
1833
- indexId: generateId(nodeIndex++),
1843
+ indexId: nodeIndex++,
1834
1844
  nodeHashId,
1835
1845
  locator: xpath,
1836
1846
  attributes: __spreadValues({
@@ -1845,7 +1855,7 @@ function extractTextWithPosition2(initNode) {
1845
1855
  nodeType,
1846
1856
  nodePath: ""
1847
1857
  };
1848
- if (elementInfo.nodeType !== NodeType2.CONTAINER) {
1858
+ if (elementInfo.nodeType !== NodeType3.CONTAINER) {
1849
1859
  elementInfoArray.push(elementInfo);
1850
1860
  }
1851
1861
  }
package/dist/es/debug.js CHANGED
@@ -107,20 +107,7 @@ function writeFileSyncWithDir(filePath, content, options = {}) {
107
107
  }
108
108
  async function getElementInfos(page) {
109
109
  const captureElementSnapshot = await page.getElementInfos();
110
- const elementsPositionInfo = captureElementSnapshot.map(
111
- (elementInfo, index) => {
112
- var _a;
113
- return {
114
- label: ((_a = elementInfo.indexId) == null ? void 0 : _a.toString()) || index.toString(),
115
- x: elementInfo.rect.left,
116
- y: elementInfo.rect.top,
117
- width: elementInfo.rect.width,
118
- height: elementInfo.rect.height,
119
- attributes: elementInfo.attributes
120
- };
121
- }
122
- );
123
- const elementsPositionInfoWithoutText = elementsPositionInfo.filter(
110
+ const elementsPositionInfoWithoutText = captureElementSnapshot.filter(
124
111
  (elementInfo) => {
125
112
  if (elementInfo.attributes.nodeType === NodeType.TEXT) {
126
113
  return false;
@@ -129,7 +116,7 @@ async function getElementInfos(page) {
129
116
  }
130
117
  );
131
118
  return {
132
- elementsPositionInfo,
119
+ elementsPositionInfo: captureElementSnapshot,
133
120
  captureElementSnapshot,
134
121
  elementsPositionInfoWithoutText
135
122
  };
package/dist/es/index.js CHANGED
@@ -897,8 +897,13 @@ import assert from "assert";
897
897
  import { randomUUID } from "crypto";
898
898
  import { readFileSync } from "fs";
899
899
  import path from "path";
900
+ import { NodeType } from "@midscene/shared/constants";
900
901
  import { findNearestPackageJson } from "@midscene/shared/fs";
901
- import { base64Encoded, imageInfoOfBase64 } from "@midscene/shared/img";
902
+ import {
903
+ base64Encoded,
904
+ imageInfoOfBase64
905
+ } from "@midscene/shared/img";
906
+ import { compositeElementInfoImg } from "@midscene/shared/img";
902
907
 
903
908
  // src/web-element.ts
904
909
  var WebElementInfo = class {
@@ -908,7 +913,8 @@ var WebElementInfo = class {
908
913
  page,
909
914
  locator,
910
915
  id,
911
- attributes
916
+ attributes,
917
+ indexId
912
918
  }) {
913
919
  this.content = content;
914
920
  this.rect = rect;
@@ -920,6 +926,7 @@ var WebElementInfo = class {
920
926
  this.locator = locator;
921
927
  this.id = id;
922
928
  this.attributes = attributes;
929
+ this.indexId = indexId;
923
930
  }
924
931
  };
925
932
 
@@ -928,19 +935,24 @@ async function parseContextFromWebPage(page, _opt) {
928
935
  assert(page, "page is required");
929
936
  const url = page.url();
930
937
  const file = await page.screenshot();
931
- const screenshotBuffer = readFileSync(file);
932
938
  const screenshotBase64 = base64Encoded(file);
933
939
  const captureElementSnapshot = await page.getElementInfos();
934
- const elementsInfo = await alignElements(
935
- screenshotBuffer,
936
- captureElementSnapshot,
937
- page
938
- );
940
+ const elementsInfo = await alignElements(captureElementSnapshot, page);
941
+ const elementsPositionInfoWithoutText = elementsInfo.filter((elementInfo) => {
942
+ if (elementInfo.attributes.nodeType === NodeType.TEXT) {
943
+ return false;
944
+ }
945
+ return true;
946
+ });
939
947
  const size = await imageInfoOfBase64(screenshotBase64);
948
+ const screenshotBase64WithElementInfos = await compositeElementInfoImg({
949
+ inputImgBase64: screenshotBase64.split(";base64,").pop(),
950
+ elementsPositionInfo: elementsPositionInfoWithoutText
951
+ });
940
952
  return {
941
953
  content: elementsInfo,
942
954
  size,
943
- screenshotBase64,
955
+ screenshotBase64: `data:image/png;base64,${screenshotBase64WithElementInfos}`,
944
956
  url
945
957
  };
946
958
  }
@@ -952,13 +964,13 @@ async function getExtraReturnLogic() {
952
964
  return `${elementInfosScriptContent}midscene_element_inspector.webExtractTextWithPosition()`;
953
965
  }
954
966
  var sizeThreshold = 3;
955
- async function alignElements(screenshotBuffer, elements, page) {
967
+ async function alignElements(elements, page) {
956
968
  const validElements = elements.filter((item) => {
957
969
  return item.rect.height >= sizeThreshold && item.rect.width >= sizeThreshold;
958
970
  });
959
971
  const textsAligned = [];
960
972
  for (const item of validElements) {
961
- const { rect, id, content, attributes, locator } = item;
973
+ const { rect, id, content, attributes, locator, indexId } = item;
962
974
  textsAligned.push(
963
975
  new WebElementInfo({
964
976
  rect,
@@ -966,7 +978,8 @@ async function alignElements(screenshotBuffer, elements, page) {
966
978
  id,
967
979
  content,
968
980
  attributes,
969
- page
981
+ page,
982
+ indexId
970
983
  })
971
984
  );
972
985
  }
@@ -1520,13 +1533,13 @@ var PageTaskExecutor = class {
1520
1533
  const assertTask = await this.convertPlanToExecutable([assertPlan]);
1521
1534
  await taskExecutor.append(this.wrapExecutorWithScreenshot(assertTask[0]));
1522
1535
  const output = await taskExecutor.flush();
1523
- if (output.pass) {
1536
+ if (output == null ? void 0 : output.pass) {
1524
1537
  return {
1525
1538
  output: void 0,
1526
1539
  executor: taskExecutor
1527
1540
  };
1528
1541
  }
1529
- errorThought = output.thought;
1542
+ errorThought = (output == null ? void 0 : output.thought) || "unknown error";
1530
1543
  const now = Date.now();
1531
1544
  if (now - startTime < checkIntervalMs) {
1532
1545
  const timeRemaining = checkIntervalMs - (now - startTime);
@@ -1822,7 +1835,7 @@ var Page = class {
1822
1835
  deviceScaleFactor: window.devicePixelRatio
1823
1836
  };
1824
1837
  });
1825
- const path3 = getTmpFile("jpeg");
1838
+ const path3 = getTmpFile("png");
1826
1839
  await this.page.screenshot({
1827
1840
  path: path3,
1828
1841
  type: "png"
@@ -1928,7 +1941,7 @@ import { DOMParser } from "@xmldom/xmldom";
1928
1941
  import {
1929
1942
  CONTAINER_MINI_HEIGHT,
1930
1943
  CONTAINER_MINI_WIDTH,
1931
- NodeType
1944
+ NodeType as NodeType2
1932
1945
  } from "@midscene/shared/constants";
1933
1946
 
1934
1947
  // src/extractor/util.ts
@@ -1946,13 +1959,10 @@ function midsceneGenerateHash(content, rect) {
1946
1959
  const hashHex = (0, import_js_sha256.default)(combined);
1947
1960
  return hashHex.slice(0, 10);
1948
1961
  }
1949
- function generateId(numberId) {
1950
- return `${numberId}`;
1951
- }
1952
1962
 
1953
1963
  // src/extractor/constants.ts
1954
1964
  import {
1955
- NodeType as NodeType2,
1965
+ NodeType as NodeType3,
1956
1966
  TEXT_MAX_SIZE,
1957
1967
  TEXT_SIZE_THRESHOLD
1958
1968
  } from "@midscene/shared/constants";
@@ -2064,35 +2074,35 @@ function extractTextWithPosition2(initNode) {
2064
2074
  let nodeType;
2065
2075
  switch (node.nodeName.toUpperCase()) {
2066
2076
  case "TEXT":
2067
- nodeType = NodeType2.TEXT;
2077
+ nodeType = NodeType3.TEXT;
2068
2078
  break;
2069
2079
  case "IMAGE":
2070
- nodeType = NodeType2.IMG;
2080
+ nodeType = NodeType3.IMG;
2071
2081
  break;
2072
2082
  case "BUTTON":
2073
- nodeType = NodeType2.BUTTON;
2083
+ nodeType = NodeType3.BUTTON;
2074
2084
  break;
2075
2085
  case "SEARCHINPUT":
2076
2086
  case "INPUT":
2077
- nodeType = NodeType2.FORM_ITEM;
2087
+ nodeType = NodeType3.FORM_ITEM;
2078
2088
  break;
2079
2089
  case "NAV":
2080
2090
  case "LIST":
2081
2091
  case "CELL":
2082
- nodeType = NodeType2.CONTAINER;
2092
+ nodeType = NodeType3.CONTAINER;
2083
2093
  break;
2084
2094
  default:
2085
2095
  if (attributes.id === "android:id/input" || attributes.id === "android:id/inputArea") {
2086
- nodeType = NodeType2.FORM_ITEM;
2096
+ nodeType = NodeType3.FORM_ITEM;
2087
2097
  } else {
2088
- nodeType = NodeType2.CONTAINER;
2098
+ nodeType = NodeType3.CONTAINER;
2089
2099
  }
2090
2100
  break;
2091
2101
  }
2092
2102
  const xpath = getXPathForElement(node);
2093
2103
  const elementInfo = {
2094
2104
  id: nodeHashId,
2095
- indexId: generateId(nodeIndex++),
2105
+ indexId: nodeIndex++,
2096
2106
  nodeHashId,
2097
2107
  locator: xpath,
2098
2108
  attributes: __spreadValues({
@@ -2107,7 +2117,7 @@ function extractTextWithPosition2(initNode) {
2107
2117
  nodeType,
2108
2118
  nodePath: ""
2109
2119
  };
2110
- if (elementInfo.nodeType !== NodeType2.CONTAINER) {
2120
+ if (elementInfo.nodeType !== NodeType3.CONTAINER) {
2111
2121
  elementInfoArray.push(elementInfo);
2112
2122
  }
2113
2123
  }
@@ -2379,29 +2389,16 @@ function writeFileSyncWithDir(filePath, content, options = {}) {
2379
2389
  }
2380
2390
  async function getElementInfos(page) {
2381
2391
  const captureElementSnapshot = await page.getElementInfos();
2382
- const elementsPositionInfo = captureElementSnapshot.map(
2383
- (elementInfo, index) => {
2384
- var _a;
2385
- return {
2386
- label: ((_a = elementInfo.indexId) == null ? void 0 : _a.toString()) || index.toString(),
2387
- x: elementInfo.rect.left,
2388
- y: elementInfo.rect.top,
2389
- width: elementInfo.rect.width,
2390
- height: elementInfo.rect.height,
2391
- attributes: elementInfo.attributes
2392
- };
2393
- }
2394
- );
2395
- const elementsPositionInfoWithoutText = elementsPositionInfo.filter(
2392
+ const elementsPositionInfoWithoutText = captureElementSnapshot.filter(
2396
2393
  (elementInfo) => {
2397
- if (elementInfo.attributes.nodeType === NodeType2.TEXT) {
2394
+ if (elementInfo.attributes.nodeType === NodeType3.TEXT) {
2398
2395
  return false;
2399
2396
  }
2400
2397
  return true;
2401
2398
  }
2402
2399
  );
2403
2400
  return {
2404
- elementsPositionInfo,
2401
+ elementsPositionInfo: captureElementSnapshot,
2405
2402
  captureElementSnapshot,
2406
2403
  elementsPositionInfoWithoutText
2407
2404
  };
@@ -325,8 +325,13 @@ import assert from "assert";
325
325
  import { randomUUID } from "crypto";
326
326
  import { readFileSync } from "fs";
327
327
  import path from "path";
328
+ import { NodeType } from "@midscene/shared/constants";
328
329
  import { findNearestPackageJson } from "@midscene/shared/fs";
329
- import { base64Encoded, imageInfoOfBase64 } from "@midscene/shared/img";
330
+ import {
331
+ base64Encoded,
332
+ imageInfoOfBase64
333
+ } from "@midscene/shared/img";
334
+ import { compositeElementInfoImg } from "@midscene/shared/img";
330
335
  function reportFileName(tag = "web") {
331
336
  const dateTimeInFileName = (0, import_dayjs.default)().format("YYYY-MM-DD_HH-mm-ss-SSS");
332
337
  return `${tag}-${dateTimeInFileName}`;
@@ -371,8 +371,13 @@ import assert from "assert";
371
371
  import { randomUUID } from "crypto";
372
372
  import { readFileSync } from "fs";
373
373
  import path from "path";
374
+ import { NodeType } from "@midscene/shared/constants";
374
375
  import { findNearestPackageJson } from "@midscene/shared/fs";
375
- import { base64Encoded, imageInfoOfBase64 } from "@midscene/shared/img";
376
+ import {
377
+ base64Encoded,
378
+ imageInfoOfBase64
379
+ } from "@midscene/shared/img";
380
+ import { compositeElementInfoImg } from "@midscene/shared/img";
376
381
 
377
382
  // src/web-element.ts
378
383
  var WebElementInfo = class {
@@ -382,7 +387,8 @@ var WebElementInfo = class {
382
387
  page,
383
388
  locator,
384
389
  id,
385
- attributes
390
+ attributes,
391
+ indexId
386
392
  }) {
387
393
  this.content = content;
388
394
  this.rect = rect;
@@ -394,6 +400,7 @@ var WebElementInfo = class {
394
400
  this.locator = locator;
395
401
  this.id = id;
396
402
  this.attributes = attributes;
403
+ this.indexId = indexId;
397
404
  }
398
405
  };
399
406
 
@@ -402,19 +409,24 @@ async function parseContextFromWebPage(page, _opt) {
402
409
  assert(page, "page is required");
403
410
  const url = page.url();
404
411
  const file = await page.screenshot();
405
- const screenshotBuffer = readFileSync(file);
406
412
  const screenshotBase64 = base64Encoded(file);
407
413
  const captureElementSnapshot = await page.getElementInfos();
408
- const elementsInfo = await alignElements(
409
- screenshotBuffer,
410
- captureElementSnapshot,
411
- page
412
- );
414
+ const elementsInfo = await alignElements(captureElementSnapshot, page);
415
+ const elementsPositionInfoWithoutText = elementsInfo.filter((elementInfo) => {
416
+ if (elementInfo.attributes.nodeType === NodeType.TEXT) {
417
+ return false;
418
+ }
419
+ return true;
420
+ });
413
421
  const size = await imageInfoOfBase64(screenshotBase64);
422
+ const screenshotBase64WithElementInfos = await compositeElementInfoImg({
423
+ inputImgBase64: screenshotBase64.split(";base64,").pop(),
424
+ elementsPositionInfo: elementsPositionInfoWithoutText
425
+ });
414
426
  return {
415
427
  content: elementsInfo,
416
428
  size,
417
- screenshotBase64,
429
+ screenshotBase64: `data:image/png;base64,${screenshotBase64WithElementInfos}`,
418
430
  url
419
431
  };
420
432
  }
@@ -426,13 +438,13 @@ async function getExtraReturnLogic() {
426
438
  return `${elementInfosScriptContent}midscene_element_inspector.webExtractTextWithPosition()`;
427
439
  }
428
440
  var sizeThreshold = 3;
429
- async function alignElements(screenshotBuffer, elements, page) {
441
+ async function alignElements(elements, page) {
430
442
  const validElements = elements.filter((item) => {
431
443
  return item.rect.height >= sizeThreshold && item.rect.width >= sizeThreshold;
432
444
  });
433
445
  const textsAligned = [];
434
446
  for (const item of validElements) {
435
- const { rect, id, content, attributes, locator } = item;
447
+ const { rect, id, content, attributes, locator, indexId } = item;
436
448
  textsAligned.push(
437
449
  new WebElementInfo({
438
450
  rect,
@@ -440,7 +452,8 @@ async function alignElements(screenshotBuffer, elements, page) {
440
452
  id,
441
453
  content,
442
454
  attributes,
443
- page
455
+ page,
456
+ indexId
444
457
  })
445
458
  );
446
459
  }
@@ -994,13 +1007,13 @@ var PageTaskExecutor = class {
994
1007
  const assertTask = await this.convertPlanToExecutable([assertPlan]);
995
1008
  await taskExecutor.append(this.wrapExecutorWithScreenshot(assertTask[0]));
996
1009
  const output = await taskExecutor.flush();
997
- if (output.pass) {
1010
+ if (output == null ? void 0 : output.pass) {
998
1011
  return {
999
1012
  output: void 0,
1000
1013
  executor: taskExecutor
1001
1014
  };
1002
1015
  }
1003
- errorThought = output.thought;
1016
+ errorThought = (output == null ? void 0 : output.thought) || "unknown error";
1004
1017
  const now = Date.now();
1005
1018
  if (now - startTime < checkIntervalMs) {
1006
1019
  const timeRemaining = checkIntervalMs - (now - startTime);
@@ -1296,7 +1309,7 @@ var Page = class {
1296
1309
  deviceScaleFactor: window.devicePixelRatio
1297
1310
  };
1298
1311
  });
1299
- const path2 = getTmpFile("jpeg");
1312
+ const path2 = getTmpFile("png");
1300
1313
  await this.page.screenshot({
1301
1314
  path: path2,
1302
1315
  type: "png"
@@ -368,8 +368,13 @@ import assert from "assert";
368
368
  import { randomUUID } from "crypto";
369
369
  import { readFileSync } from "fs";
370
370
  import path from "path";
371
+ import { NodeType } from "@midscene/shared/constants";
371
372
  import { findNearestPackageJson } from "@midscene/shared/fs";
372
- import { base64Encoded, imageInfoOfBase64 } from "@midscene/shared/img";
373
+ import {
374
+ base64Encoded,
375
+ imageInfoOfBase64
376
+ } from "@midscene/shared/img";
377
+ import { compositeElementInfoImg } from "@midscene/shared/img";
373
378
 
374
379
  // src/web-element.ts
375
380
  var WebElementInfo = class {
@@ -379,7 +384,8 @@ var WebElementInfo = class {
379
384
  page,
380
385
  locator,
381
386
  id,
382
- attributes
387
+ attributes,
388
+ indexId
383
389
  }) {
384
390
  this.content = content;
385
391
  this.rect = rect;
@@ -391,6 +397,7 @@ var WebElementInfo = class {
391
397
  this.locator = locator;
392
398
  this.id = id;
393
399
  this.attributes = attributes;
400
+ this.indexId = indexId;
394
401
  }
395
402
  };
396
403
 
@@ -399,19 +406,24 @@ async function parseContextFromWebPage(page, _opt) {
399
406
  assert(page, "page is required");
400
407
  const url = page.url();
401
408
  const file = await page.screenshot();
402
- const screenshotBuffer = readFileSync(file);
403
409
  const screenshotBase64 = base64Encoded(file);
404
410
  const captureElementSnapshot = await page.getElementInfos();
405
- const elementsInfo = await alignElements(
406
- screenshotBuffer,
407
- captureElementSnapshot,
408
- page
409
- );
411
+ const elementsInfo = await alignElements(captureElementSnapshot, page);
412
+ const elementsPositionInfoWithoutText = elementsInfo.filter((elementInfo) => {
413
+ if (elementInfo.attributes.nodeType === NodeType.TEXT) {
414
+ return false;
415
+ }
416
+ return true;
417
+ });
410
418
  const size = await imageInfoOfBase64(screenshotBase64);
419
+ const screenshotBase64WithElementInfos = await compositeElementInfoImg({
420
+ inputImgBase64: screenshotBase64.split(";base64,").pop(),
421
+ elementsPositionInfo: elementsPositionInfoWithoutText
422
+ });
411
423
  return {
412
424
  content: elementsInfo,
413
425
  size,
414
- screenshotBase64,
426
+ screenshotBase64: `data:image/png;base64,${screenshotBase64WithElementInfos}`,
415
427
  url
416
428
  };
417
429
  }
@@ -423,13 +435,13 @@ async function getExtraReturnLogic() {
423
435
  return `${elementInfosScriptContent}midscene_element_inspector.webExtractTextWithPosition()`;
424
436
  }
425
437
  var sizeThreshold = 3;
426
- async function alignElements(screenshotBuffer, elements, page) {
438
+ async function alignElements(elements, page) {
427
439
  const validElements = elements.filter((item) => {
428
440
  return item.rect.height >= sizeThreshold && item.rect.width >= sizeThreshold;
429
441
  });
430
442
  const textsAligned = [];
431
443
  for (const item of validElements) {
432
- const { rect, id, content, attributes, locator } = item;
444
+ const { rect, id, content, attributes, locator, indexId } = item;
433
445
  textsAligned.push(
434
446
  new WebElementInfo({
435
447
  rect,
@@ -437,7 +449,8 @@ async function alignElements(screenshotBuffer, elements, page) {
437
449
  id,
438
450
  content,
439
451
  attributes,
440
- page
452
+ page,
453
+ indexId
441
454
  })
442
455
  );
443
456
  }
@@ -991,13 +1004,13 @@ var PageTaskExecutor = class {
991
1004
  const assertTask = await this.convertPlanToExecutable([assertPlan]);
992
1005
  await taskExecutor.append(this.wrapExecutorWithScreenshot(assertTask[0]));
993
1006
  const output = await taskExecutor.flush();
994
- if (output.pass) {
1007
+ if (output == null ? void 0 : output.pass) {
995
1008
  return {
996
1009
  output: void 0,
997
1010
  executor: taskExecutor
998
1011
  };
999
1012
  }
1000
- errorThought = output.thought;
1013
+ errorThought = (output == null ? void 0 : output.thought) || "unknown error";
1001
1014
  const now = Date.now();
1002
1015
  if (now - startTime < checkIntervalMs) {
1003
1016
  const timeRemaining = checkIntervalMs - (now - startTime);
@@ -1165,7 +1178,7 @@ var Page = class {
1165
1178
  deviceScaleFactor: window.devicePixelRatio
1166
1179
  };
1167
1180
  });
1168
- const path2 = getTmpFile("jpeg");
1181
+ const path2 = getTmpFile("png");
1169
1182
  await this.page.screenshot({
1170
1183
  path: path2,
1171
1184
  type: "png"