@letsrunit/playwright 0.7.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5,6 +5,7 @@ import rehypeStringify from 'rehype-stringify';
5
5
  import { unified } from 'unified';
6
6
  import stringify from 'fast-json-stable-stringify';
7
7
  import { JSDOM } from 'jsdom';
8
+ import * as Diff from 'diff';
8
9
 
9
10
  // src/browser.ts
10
11
  async function browse(browser, options = {}) {
@@ -83,11 +84,11 @@ function formatDateForInput(date, type) {
83
84
  return `${yyyy}-${mm}-${dd}`;
84
85
  }
85
86
  }
86
- function formatDate(d, format) {
87
+ function formatDate(d, format2) {
87
88
  const dd = String(d.getDate()).padStart(2, "0");
88
89
  const mm = String(d.getMonth() + 1).padStart(2, "0");
89
90
  const yyyy = String(d.getFullYear());
90
- return format.replace("DD", dd).replace("MM", mm).replace("YYYY", yyyy);
91
+ return format2.replace("DD", dd).replace("MM", mm).replace("YYYY", yyyy);
91
92
  }
92
93
  function getMonthNames(locale) {
93
94
  const formatter = new Intl.DateTimeFormat(locale, { month: "long" });
@@ -1226,21 +1227,32 @@ async function formatHtml(page) {
1226
1227
  }
1227
1228
 
1228
1229
  // src/fuzzy-locator.ts
1230
+ function debug(...args) {
1231
+ if (process.env.LETSRUNIT_DEBUG_FUZZY_LOCATOR === "1") {
1232
+ console.log("[fuzzyLocator]", ...args);
1233
+ }
1234
+ }
1229
1235
  async function fuzzyLocator(page, selector) {
1236
+ debug("input selector:", selector);
1230
1237
  const primary = page.locator(selector);
1231
1238
  const candidates = [
1232
- tryRelaxNameToHasText(page, selector),
1233
- tryTagInsteadOfRole(page, selector),
1234
- tryRoleNameProximity(page, selector),
1235
- tryFieldAlternative(page, selector),
1236
- tryAsField(page, selector)
1239
+ { name: "relaxNameToHasText", locator: tryRelaxNameToHasText(page, selector) },
1240
+ { name: "tagInsteadOfRole", locator: tryTagInsteadOfRole(page, selector) },
1241
+ { name: "roleNameProximity", locator: tryRoleNameProximity(page, selector) },
1242
+ { name: "fieldAlternative", locator: tryFieldAlternative(page, selector) },
1243
+ { name: "asField", locator: tryAsField(page, selector) }
1237
1244
  ];
1238
1245
  let combined = primary;
1246
+ const enabled = [];
1239
1247
  for (const candidate of candidates) {
1240
- if (!candidate) continue;
1241
- combined = combined.or(candidate);
1248
+ if (!candidate.locator) continue;
1249
+ enabled.push(candidate.name);
1250
+ combined = combined.or(candidate.locator);
1242
1251
  }
1243
- return combined.first();
1252
+ debug("enabled fallbacks:", enabled.length ? enabled.join(", ") : "(none)");
1253
+ const result = combined.first();
1254
+ debug("returning locator:", result.toString());
1255
+ return result;
1244
1256
  }
1245
1257
  function tryRelaxNameToHasText(page, selector) {
1246
1258
  const matchAnyNameFull = selector.match(/^(role=.*)\[name="([^"]+)"i?](.*)$/i);
@@ -1658,12 +1670,336 @@ async function screenshotWithMask(page, options) {
1658
1670
  }
1659
1671
  }
1660
1672
 
1673
+ // src/utils/type-check.ts
1674
+ function isPage(page) {
1675
+ return typeof page.content === "function" && typeof page.url === "function" && typeof page.screenshot === "function";
1676
+ }
1677
+
1678
+ // src/scrub-html.ts
1679
+ var HTML_MIN_ATTR_THRESHOLD = 25e4;
1680
+ var HTML_LIMIT_LISTS_THRESHOLD = 4e5;
1681
+ var HTML_MAIN_ONLY_THRESHOLD = 6e5;
1682
+ function getDefaults(contentLength) {
1683
+ return {
1684
+ dropHidden: true,
1685
+ dropHead: true,
1686
+ dropSvg: false,
1687
+ pickMain: contentLength >= HTML_MAIN_ONLY_THRESHOLD,
1688
+ stripAttributes: contentLength >= HTML_MIN_ATTR_THRESHOLD ? 2 : 1,
1689
+ normalizeWhitespace: true,
1690
+ dropComments: true,
1691
+ replaceBrInHeadings: true,
1692
+ limitLists: contentLength >= HTML_LIMIT_LISTS_THRESHOLD ? 20 : -1,
1693
+ dropUtilityClasses: false
1694
+ };
1695
+ }
1696
+ var ALLOWED_ATTRS = {
1697
+ match: /* @__PURE__ */ new Set([
1698
+ // identity/semantics
1699
+ "id",
1700
+ "class",
1701
+ "role",
1702
+ // internationalization
1703
+ "lang",
1704
+ "dir",
1705
+ // anchors & media
1706
+ "href",
1707
+ "title",
1708
+ "target",
1709
+ "rel",
1710
+ "src",
1711
+ "alt",
1712
+ "width",
1713
+ "height",
1714
+ "loading",
1715
+ // tables
1716
+ "scope",
1717
+ "headers",
1718
+ "colspan",
1719
+ "rowspan",
1720
+ // forms (pure semantics—doesn’t change structure)
1721
+ "name",
1722
+ "value",
1723
+ "type",
1724
+ "for",
1725
+ "placeholder",
1726
+ "checked",
1727
+ "selected",
1728
+ "multiple",
1729
+ "method",
1730
+ "action",
1731
+ // time, figure, etc.
1732
+ "datetime"
1733
+ ]),
1734
+ regexp: /^aria-[\w-]+|^data-[\w-]+$/i
1735
+ // ARIA attributes & data-* attributes
1736
+ };
1737
+ var ALLOWED_ATTRS_AGGRESSIVE = {
1738
+ match: /* @__PURE__ */ new Set([
1739
+ // structuur / algemene selectors
1740
+ "id",
1741
+ "class",
1742
+ "role",
1743
+ // links / media
1744
+ "href",
1745
+ "src",
1746
+ "alt",
1747
+ "title",
1748
+ // tables
1749
+ "scope",
1750
+ // forms / velden
1751
+ "name",
1752
+ "type",
1753
+ "for",
1754
+ "placeholder",
1755
+ "value",
1756
+ "checked",
1757
+ "selected",
1758
+ // ARIA voor Playwright getByRole/getByLabel
1759
+ "aria-label",
1760
+ "aria-labelledby",
1761
+ "aria-describedby",
1762
+ // veelgebruikte test selectors
1763
+ "data-testid",
1764
+ "data-test-id",
1765
+ "data-cy",
1766
+ "data-qa"
1767
+ ]),
1768
+ regexp: null
1769
+ };
1770
+ var HIDDEN_SELECTORS = [
1771
+ "[hidden]",
1772
+ "[inert]",
1773
+ '[aria-hidden="true"]',
1774
+ '[style*="display:none"]',
1775
+ '[style*="visibility:hidden"]',
1776
+ '[style*="opacity:0"]'
1777
+ ].join(",");
1778
+ var ALWAYS_DROP = [
1779
+ "script",
1780
+ "style",
1781
+ "template",
1782
+ "noscript",
1783
+ "slot",
1784
+ "object",
1785
+ "embed"
1786
+ ];
1787
+ async function scrubHtml(page, opts = {}) {
1788
+ if (isPage(page)) page = { html: await page.content(), url: page.url() };
1789
+ return await memoizedScrubHtml(page, opts);
1790
+ }
1791
+ var memoizedScrubHtml = memoize(realScrubHtml, {
1792
+ max: 16,
1793
+ ttl: 10 * 6e4,
1794
+ cacheKey: (args) => stringify({ html: args[0].html, url: args[0].url, ...args[1] })
1795
+ });
1796
+ async function realScrubHtml({ html, url }, opts = {}) {
1797
+ const o = { ...getDefaults(html.length), ...opts };
1798
+ const dom = new JSDOM(html, { url });
1799
+ const doc = dom.window.document;
1800
+ if (o.pickMain) pickMain(doc);
1801
+ dropInfraAndSvg(doc, !!o.dropSvg);
1802
+ if (o.dropHidden) dropHiddenTrees(doc);
1803
+ if (o.stripAttributes) stripAttributesAndSanitize(doc, o.stripAttributes);
1804
+ if (o.dropComments) dropHtmlComments(doc);
1805
+ if (o.replaceBrInHeadings) replaceBrsInHeadings(doc);
1806
+ if (o.limitLists >= 0) limitListsAndRows(doc, o.limitLists);
1807
+ if (o.dropUtilityClasses) stripUtilityClasses(doc);
1808
+ if (o.normalizeWhitespace) normalizeWhitespace(doc.body);
1809
+ return doc.body.innerHTML;
1810
+ }
1811
+ function hasHiddenAncestor(el) {
1812
+ let p = el.parentElement;
1813
+ while (p) {
1814
+ if (p.hasAttribute("hidden") || p.hasAttribute("inert") || p.getAttribute("aria-hidden") === "true") return true;
1815
+ const style = p.getAttribute("style") || "";
1816
+ if (/\bdisplay\s*:\s*none\b/i.test(style)) return true;
1817
+ if (/\bvisibility\s*:\s*hidden\b/i.test(style)) return true;
1818
+ if (/\bopacity\s*:\s*0(?:\D|$)/i.test(style)) return true;
1819
+ p = p.parentElement;
1820
+ }
1821
+ return false;
1822
+ }
1823
+ function normalizeWhitespace(root) {
1824
+ const preLike = /* @__PURE__ */ new Set(["PRE", "CODE", "SAMP", "KBD"]);
1825
+ const doc = root.ownerDocument;
1826
+ const walker = doc.createTreeWalker(
1827
+ root,
1828
+ 4
1829
+ /*NodeFilter.SHOW_TEXT*/
1830
+ );
1831
+ const changes = [];
1832
+ let node;
1833
+ while (node = walker.nextNode()) {
1834
+ const text = node;
1835
+ const parent = text.parentElement;
1836
+ if (!parent) continue;
1837
+ if (preLike.has(parent.tagName)) continue;
1838
+ const v = text.nodeValue ?? "";
1839
+ const collapsed = v.replace(/\s+/g, " ");
1840
+ if (collapsed !== v) changes.push(text);
1841
+ }
1842
+ for (const t of changes) {
1843
+ const parent = t.parentElement;
1844
+ const isBlockish = /^(P|LI|DIV|SECTION|ARTICLE|ASIDE|HEADER|FOOTER|MAIN|NAV|H[1-6]|BLOCKQUOTE|FIGCAPTION|TD|TH)$/i.test(parent.tagName);
1845
+ t.nodeValue = (t.nodeValue || "").replace(/\s+/g, " ");
1846
+ if (isBlockish) t.nodeValue = (t.nodeValue || "").trim();
1847
+ }
1848
+ }
1849
+ function pickMain(doc) {
1850
+ const main = doc.querySelector("main");
1851
+ if (!main) return false;
1852
+ const clone = main.cloneNode(true);
1853
+ doc.body.innerHTML = "";
1854
+ doc.body.appendChild(clone);
1855
+ return true;
1856
+ }
1857
+ function dropInfraAndSvg(doc, dropSvg) {
1858
+ const toDrop = [...ALWAYS_DROP, dropSvg ? "svg" : ""].filter(Boolean).join(",");
1859
+ if (!toDrop) return;
1860
+ doc.querySelectorAll(toDrop).forEach((el) => el.remove());
1861
+ }
1862
+ function dropHiddenTrees(doc) {
1863
+ doc.querySelectorAll(HIDDEN_SELECTORS).forEach((el) => el.remove());
1864
+ const all = [...doc.body.querySelectorAll("*")];
1865
+ for (const el of all) {
1866
+ if (!el.isConnected) continue;
1867
+ if (hasHiddenAncestor(el)) el.remove();
1868
+ }
1869
+ }
1870
+ function stripAttributesAndSanitize(doc, level) {
1871
+ if (!level) return;
1872
+ const all = [...doc.body.querySelectorAll("*")];
1873
+ for (const el of all) {
1874
+ const isSvg = el.namespaceURI === "http://www.w3.org/2000/svg";
1875
+ for (const { name } of [...el.attributes]) {
1876
+ const lower = name.toLowerCase();
1877
+ if (lower.startsWith("on")) {
1878
+ el.removeAttribute(name);
1879
+ continue;
1880
+ }
1881
+ if (lower === "style") {
1882
+ el.removeAttribute(name);
1883
+ continue;
1884
+ }
1885
+ if (isSvg) continue;
1886
+ const allowed = level === 1 ? ALLOWED_ATTRS : ALLOWED_ATTRS_AGGRESSIVE;
1887
+ if (!allowed.match.has(lower) && !allowed.regexp?.test(name)) {
1888
+ el.removeAttribute(name);
1889
+ }
1890
+ }
1891
+ }
1892
+ doc.querySelectorAll("a[href]").forEach((a) => {
1893
+ const href = a.getAttribute("href") || "";
1894
+ if (/^\s*javascript:/i.test(href)) a.removeAttribute("href");
1895
+ });
1896
+ }
1897
+ function dropHtmlComments(doc) {
1898
+ const nf = doc.defaultView?.NodeFilter;
1899
+ const SHOW_COMMENT = nf?.SHOW_COMMENT ?? 128;
1900
+ const walker = doc.createTreeWalker(doc, SHOW_COMMENT);
1901
+ const toRemove = [];
1902
+ let n;
1903
+ while (n = walker.nextNode()) toRemove.push(n);
1904
+ toRemove.forEach((c) => c.parentNode?.removeChild(c));
1905
+ }
1906
+ function replaceBrsInHeadings(doc) {
1907
+ doc.querySelectorAll("h1, h2, h3, h4, h5, h6").forEach((h) => {
1908
+ h.querySelectorAll("br").forEach((br) => {
1909
+ const space = doc.createTextNode(" ");
1910
+ br.replaceWith(space);
1911
+ });
1912
+ });
1913
+ }
1914
+ var UTILITY_VARIANT_RE = /:/;
1915
+ var UTILITY_PREFIX_RE = /^-?(?:p[xytblrse]?|m[xytblrse]?|gap|space-[xy]|w|h|min-w|min-h|max-w|max-h|size|basis|inset|top|right|bottom|left|start|end|z|text|bg|border|ring|shadow|outline|fill|stroke|divide|accent|caret|from|via|to|decoration|font|leading|tracking|indent|line-clamp|columns|aspect|object|opacity|rotate|scale|translate|skew|transition|duration|ease|delay|animate|rounded|overflow|overscroll|scroll|snap|touch|cursor|pointer-events|select|resize|flex|grid|col|row|order|auto-cols|auto-rows|items|justify|content|self|place|float|clear|list|whitespace|break|hyphens|mix-blend|bg-blend|backdrop|d|g|fs|fw|lh|align|position)-/i;
1916
+ var UTILITY_STANDALONE = /* @__PURE__ */ new Set([
1917
+ "flex",
1918
+ "grid",
1919
+ "block",
1920
+ "hidden",
1921
+ "inline",
1922
+ "inline-block",
1923
+ "inline-flex",
1924
+ "inline-grid",
1925
+ "contents",
1926
+ "flow-root",
1927
+ "list-item",
1928
+ "table",
1929
+ "container",
1930
+ "truncate",
1931
+ "grow",
1932
+ "shrink",
1933
+ "static",
1934
+ "relative",
1935
+ "absolute",
1936
+ "fixed",
1937
+ "sticky",
1938
+ "visible",
1939
+ "invisible",
1940
+ "collapse",
1941
+ "isolate",
1942
+ "underline",
1943
+ "overline",
1944
+ "line-through",
1945
+ "no-underline",
1946
+ "uppercase",
1947
+ "lowercase",
1948
+ "capitalize",
1949
+ "normal-case",
1950
+ "italic",
1951
+ "not-italic",
1952
+ "antialiased",
1953
+ "subpixel-antialiased",
1954
+ "sr-only",
1955
+ "not-sr-only",
1956
+ "clearfix",
1957
+ "row",
1958
+ "col"
1959
+ ]);
1960
+ function isUtilityClass(token) {
1961
+ if (UTILITY_VARIANT_RE.test(token)) return true;
1962
+ const base = token.startsWith("-") ? token.slice(1) : token;
1963
+ if (UTILITY_STANDALONE.has(base)) return true;
1964
+ return UTILITY_PREFIX_RE.test(token);
1965
+ }
1966
+ function stripUtilityClasses(doc) {
1967
+ for (const el of doc.body.querySelectorAll("[class]")) {
1968
+ const kept = el.className.split(/\s+/).filter((t) => t && !isUtilityClass(t));
1969
+ if (kept.length === 0) el.removeAttribute("class");
1970
+ else el.className = kept.join(" ");
1971
+ }
1972
+ }
1973
+ function limitListsAndRows(doc, limit) {
1974
+ doc.querySelectorAll("ul, ol").forEach((list) => {
1975
+ const items = Array.from(list.children).filter((c) => c.tagName === "LI");
1976
+ for (let i = limit; i < items.length; i++) items[i].remove();
1977
+ });
1978
+ const rowContainers = doc.querySelectorAll("table, thead, tbody, tfoot");
1979
+ rowContainers.forEach((container) => {
1980
+ const rows = Array.from(container.children).filter((c) => c.tagName === "TR");
1981
+ for (let i = limit; i < rows.length; i++) rows[i].remove();
1982
+ });
1983
+ }
1984
+
1661
1985
  // src/snapshot.ts
1662
- async function snapshot(page) {
1986
+ async function snapshot(page, opts = {}) {
1663
1987
  await sleep(500);
1664
1988
  await waitForDomIdle(page);
1665
1989
  const [url, html, file] = await Promise.all([page.url(), getContentWithMarkedHidden(page), screenshot(page)]);
1666
- return { url, html, screenshot: file };
1990
+ const finalHtml = opts.dropUtilityClasses ? await realScrubHtml({ html, url }, {
1991
+ dropHidden: false,
1992
+ dropHead: false,
1993
+ dropSvg: false,
1994
+ pickMain: false,
1995
+ stripAttributes: 0,
1996
+ normalizeWhitespace: false,
1997
+ dropComments: false,
1998
+ replaceBrInHeadings: false,
1999
+ limitLists: -1,
2000
+ dropUtilityClasses: true
2001
+ }) : html;
2002
+ return { url, html: finalHtml, screenshot: file };
1667
2003
  }
1668
2004
  async function getContentWithMarkedHidden(page) {
1669
2005
  try {
@@ -3190,258 +3526,17 @@ async function suppressInterferences(page, opts = {}) {
3190
3526
  await sleep(pollIntervalMs);
3191
3527
  }
3192
3528
  }
3193
-
3194
- // src/utils/type-check.ts
3195
- function isPage(page) {
3196
- return typeof page.content === "function" && typeof page.url === "function" && typeof page.screenshot === "function";
3197
- }
3198
-
3199
- // src/scrub-html.ts
3200
- var HTML_MIN_ATTR_THRESHOLD = 25e4;
3201
- var HTML_LIMIT_LISTS_THRESHOLD = 4e5;
3202
- var HTML_MAIN_ONLY_THRESHOLD = 6e5;
3203
- function getDefaults(contentLength) {
3204
- return {
3205
- dropHidden: true,
3206
- dropHead: true,
3207
- dropSvg: false,
3208
- pickMain: contentLength >= HTML_MAIN_ONLY_THRESHOLD,
3209
- stripAttributes: contentLength >= HTML_MIN_ATTR_THRESHOLD ? 2 : 1,
3210
- normalizeWhitespace: true,
3211
- dropComments: true,
3212
- replaceBrInHeadings: true,
3213
- limitLists: contentLength >= HTML_LIMIT_LISTS_THRESHOLD ? 20 : -1
3214
- };
3215
- }
3216
- var ALLOWED_ATTRS = {
3217
- match: /* @__PURE__ */ new Set([
3218
- // identity/semantics
3219
- "id",
3220
- "class",
3221
- "role",
3222
- // internationalization
3223
- "lang",
3224
- "dir",
3225
- // anchors & media
3226
- "href",
3227
- "title",
3228
- "target",
3229
- "rel",
3230
- "src",
3231
- "alt",
3232
- "width",
3233
- "height",
3234
- "loading",
3235
- // tables
3236
- "scope",
3237
- "headers",
3238
- "colspan",
3239
- "rowspan",
3240
- // forms (pure semantics—doesn’t change structure)
3241
- "name",
3242
- "value",
3243
- "type",
3244
- "for",
3245
- "placeholder",
3246
- "checked",
3247
- "selected",
3248
- "multiple",
3249
- "method",
3250
- "action",
3251
- // time, figure, etc.
3252
- "datetime"
3253
- ]),
3254
- regexp: /^aria-[\w-]+|^data-[\w-]+$/i
3255
- // ARIA attributes & data-* attributes
3256
- };
3257
- var ALLOWED_ATTRS_AGGRESSIVE = {
3258
- match: /* @__PURE__ */ new Set([
3259
- // structuur / algemene selectors
3260
- "id",
3261
- "class",
3262
- "role",
3263
- // links / media
3264
- "href",
3265
- "src",
3266
- "alt",
3267
- "title",
3268
- // tables
3269
- "scope",
3270
- // forms / velden
3271
- "name",
3272
- "type",
3273
- "for",
3274
- "placeholder",
3275
- "value",
3276
- "checked",
3277
- "selected",
3278
- // ARIA voor Playwright getByRole/getByLabel
3279
- "aria-label",
3280
- "aria-labelledby",
3281
- "aria-describedby",
3282
- // veelgebruikte test selectors
3283
- "data-testid",
3284
- "data-test-id",
3285
- "data-cy",
3286
- "data-qa"
3287
- ]),
3288
- regexp: null
3289
- };
3290
- var HIDDEN_SELECTORS = [
3291
- "[hidden]",
3292
- "[inert]",
3293
- '[aria-hidden="true"]',
3294
- '[style*="display:none"]',
3295
- '[style*="visibility:hidden"]',
3296
- '[style*="opacity:0"]'
3297
- ].join(",");
3298
- var ALWAYS_DROP = [
3299
- "script",
3300
- "style",
3301
- "template",
3302
- "noscript",
3303
- "slot",
3304
- "object",
3305
- "embed"
3306
- ];
3307
- async function scrubHtml(page, opts = {}) {
3308
- if (isPage(page)) page = { html: await page.content(), url: page.url() };
3309
- return await memoizedScrubHtml(page, opts);
3310
- }
3311
- var memoizedScrubHtml = memoize(realScrubHtml, {
3312
- max: 16,
3313
- ttl: 10 * 6e4,
3314
- cacheKey: (args) => stringify({ html: args[0].html, url: args[0].url, ...args[1] })
3315
- });
3316
- async function realScrubHtml({ html, url }, opts = {}) {
3317
- const o = { ...getDefaults(html.length), ...opts };
3318
- const dom = new JSDOM(html, { url });
3319
- const doc = dom.window.document;
3320
- if (o.pickMain) pickMain(doc);
3321
- dropInfraAndSvg(doc, !!o.dropSvg);
3322
- if (o.dropHidden) dropHiddenTrees(doc);
3323
- if (o.stripAttributes) stripAttributesAndSanitize(doc, o.stripAttributes);
3324
- if (o.dropComments) dropHtmlComments(doc);
3325
- if (o.replaceBrInHeadings) replaceBrsInHeadings(doc);
3326
- if (o.limitLists >= 0) limitListsAndRows(doc, o.limitLists);
3327
- if (o.normalizeWhitespace) normalizeWhitespace(doc.body);
3328
- return doc.body.innerHTML;
3329
- }
3330
- function hasHiddenAncestor(el) {
3331
- let p = el.parentElement;
3332
- while (p) {
3333
- if (p.hasAttribute("hidden") || p.hasAttribute("inert") || p.getAttribute("aria-hidden") === "true") return true;
3334
- const style = p.getAttribute("style") || "";
3335
- if (/\bdisplay\s*:\s*none\b/i.test(style)) return true;
3336
- if (/\bvisibility\s*:\s*hidden\b/i.test(style)) return true;
3337
- if (/\bopacity\s*:\s*0(?:\D|$)/i.test(style)) return true;
3338
- p = p.parentElement;
3339
- }
3340
- return false;
3341
- }
3342
- function normalizeWhitespace(root) {
3343
- const preLike = /* @__PURE__ */ new Set(["PRE", "CODE", "SAMP", "KBD"]);
3344
- const doc = root.ownerDocument;
3345
- const walker = doc.createTreeWalker(
3346
- root,
3347
- 4
3348
- /*NodeFilter.SHOW_TEXT*/
3349
- );
3350
- const changes = [];
3351
- let node;
3352
- while (node = walker.nextNode()) {
3353
- const text = node;
3354
- const parent = text.parentElement;
3355
- if (!parent) continue;
3356
- if (preLike.has(parent.tagName)) continue;
3357
- const v = text.nodeValue ?? "";
3358
- const collapsed = v.replace(/\s+/g, " ");
3359
- if (collapsed !== v) changes.push(text);
3360
- }
3361
- for (const t of changes) {
3362
- const parent = t.parentElement;
3363
- const isBlockish = /^(P|LI|DIV|SECTION|ARTICLE|ASIDE|HEADER|FOOTER|MAIN|NAV|H[1-6]|BLOCKQUOTE|FIGCAPTION|TD|TH)$/i.test(parent.tagName);
3364
- t.nodeValue = (t.nodeValue || "").replace(/\s+/g, " ");
3365
- if (isBlockish) t.nodeValue = (t.nodeValue || "").trim();
3366
- }
3367
- }
3368
- function pickMain(doc) {
3369
- const main = doc.querySelector("main");
3370
- if (!main) return false;
3371
- const clone = main.cloneNode(true);
3372
- doc.body.innerHTML = "";
3373
- doc.body.appendChild(clone);
3374
- return true;
3375
- }
3376
- function dropInfraAndSvg(doc, dropSvg) {
3377
- const toDrop = [...ALWAYS_DROP, dropSvg ? "svg" : ""].filter(Boolean).join(",");
3378
- if (!toDrop) return;
3379
- doc.querySelectorAll(toDrop).forEach((el) => el.remove());
3380
- }
3381
- function dropHiddenTrees(doc) {
3382
- doc.querySelectorAll(HIDDEN_SELECTORS).forEach((el) => el.remove());
3383
- const all = [...doc.body.querySelectorAll("*")];
3384
- for (const el of all) {
3385
- if (!el.isConnected) continue;
3386
- if (hasHiddenAncestor(el)) el.remove();
3387
- }
3388
- }
3389
- function stripAttributesAndSanitize(doc, level) {
3390
- if (!level) return;
3391
- const all = [...doc.body.querySelectorAll("*")];
3392
- for (const el of all) {
3393
- const isSvg = el.namespaceURI === "http://www.w3.org/2000/svg";
3394
- for (const { name } of [...el.attributes]) {
3395
- const lower = name.toLowerCase();
3396
- if (lower.startsWith("on")) {
3397
- el.removeAttribute(name);
3398
- continue;
3399
- }
3400
- if (lower === "style") {
3401
- el.removeAttribute(name);
3402
- continue;
3403
- }
3404
- if (isSvg) continue;
3405
- const allowed = level === 1 ? ALLOWED_ATTRS : ALLOWED_ATTRS_AGGRESSIVE;
3406
- if (!allowed.match.has(lower) && !allowed.regexp?.test(name)) {
3407
- el.removeAttribute(name);
3408
- }
3409
- }
3410
- }
3411
- doc.querySelectorAll("a[href]").forEach((a) => {
3412
- const href = a.getAttribute("href") || "";
3413
- if (/^\s*javascript:/i.test(href)) a.removeAttribute("href");
3414
- });
3415
- }
3416
- function dropHtmlComments(doc) {
3417
- const nf = doc.defaultView?.NodeFilter;
3418
- const SHOW_COMMENT = nf?.SHOW_COMMENT ?? 128;
3419
- const walker = doc.createTreeWalker(doc, SHOW_COMMENT);
3420
- const toRemove = [];
3421
- let n;
3422
- while (n = walker.nextNode()) toRemove.push(n);
3423
- toRemove.forEach((c) => c.parentNode?.removeChild(c));
3529
+ async function format(rawHtml, url) {
3530
+ const html = await scrubHtml({ html: rawHtml, url });
3531
+ return await formatHtml(html);
3424
3532
  }
3425
- function replaceBrsInHeadings(doc) {
3426
- doc.querySelectorAll("h1, h2, h3, h4, h5, h6").forEach((h) => {
3427
- h.querySelectorAll("br").forEach((br) => {
3428
- const space = doc.createTextNode(" ");
3429
- br.replaceWith(space);
3430
- });
3431
- });
3432
- }
3433
- function limitListsAndRows(doc, limit) {
3434
- doc.querySelectorAll("ul, ol").forEach((list) => {
3435
- const items = Array.from(list.children).filter((c) => c.tagName === "LI");
3436
- for (let i = limit; i < items.length; i++) items[i].remove();
3437
- });
3438
- const rowContainers = doc.querySelectorAll("table, thead, tbody, tfoot");
3439
- rowContainers.forEach((container) => {
3440
- const rows = Array.from(container.children).filter((c) => c.tagName === "TR");
3441
- for (let i = limit; i < rows.length; i++) rows[i].remove();
3442
- });
3533
+ async function unifiedHtmlDiff(old, current) {
3534
+ if (isPage(old)) old = { html: await old.content(), url: old.url() };
3535
+ if (isPage(current)) current = { html: await current.content(), url: current.url() };
3536
+ const [a, b] = await Promise.all([format(old.html, old.url), format(current.html, current.url)]);
3537
+ return Diff.createTwoFilesPatch("before.html", "after.html", a, b);
3443
3538
  }
3444
3539
 
3445
- export { browse, createDateEngine, createFieldEngine, formatDate, formatDateForInput, formatHtml, fuzzyLocator, getMonthNames, realScrubHtml, screenshot, screenshotElement, scrollToCenter, scrubHtml, setFieldValue, snapshot, suppressInterferences, waitAfterInteraction, waitForAnimationsToFinish, waitForDomIdle, waitForIdle, waitForMeta, waitForUrlChange, waitUntilEnabled };
3540
+ export { browse, createDateEngine, createFieldEngine, formatDate, formatDateForInput, formatHtml, fuzzyLocator, getMonthNames, realScrubHtml, screenshot, screenshotElement, scrollToCenter, scrubHtml, setFieldValue, snapshot, suppressInterferences, unifiedHtmlDiff, waitAfterInteraction, waitForAnimationsToFinish, waitForDomIdle, waitForIdle, waitForMeta, waitForUrlChange, waitUntilEnabled };
3446
3541
  //# sourceMappingURL=index.js.map
3447
3542
  //# sourceMappingURL=index.js.map