@letsrunit/playwright 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +17 -2
- package/dist/index.js +338 -254
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/src/index.ts +1 -0
- package/src/scrub-html.ts +40 -0
- package/src/snapshot.ts +16 -2
package/dist/index.d.ts
CHANGED
|
@@ -63,7 +63,11 @@ interface PageInfo {
|
|
|
63
63
|
screenshot?: File;
|
|
64
64
|
}
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
type SnapshotOptions = {
|
|
67
|
+
/** Strip utility-framework classes (Tailwind, Bootstrap, UnoCSS, Windi) from the captured HTML. */
|
|
68
|
+
dropUtilityClasses?: boolean;
|
|
69
|
+
};
|
|
70
|
+
declare function snapshot(page: Page, opts?: SnapshotOptions): Promise<Snapshot>;
|
|
67
71
|
|
|
68
72
|
declare function screenshotElement(page: Page, selector: string, options?: LocatorScreenshotOptions): Promise<File>;
|
|
69
73
|
declare function screenshot(page: Page, options?: PageScreenshotOptions): Promise<File>;
|
|
@@ -123,6 +127,9 @@ type ScrubHtmlOptions = {
|
|
|
123
127
|
replaceBrInHeadings?: boolean;
|
|
124
128
|
/** Limit lists to max items: -1 mean no limit. Default: -1 */
|
|
125
129
|
limitLists?: number;
|
|
130
|
+
/** Strip utility-framework classes (Tailwind, Bootstrap, UnoCSS, Windi) from class
|
|
131
|
+
* attributes. Removes the attribute entirely when all classes are stripped. Default: false */
|
|
132
|
+
dropUtilityClasses?: boolean;
|
|
126
133
|
};
|
|
127
134
|
declare function scrubHtml(page: {
|
|
128
135
|
html: string;
|
|
@@ -136,4 +143,12 @@ declare function realScrubHtml({ html, url }: {
|
|
|
136
143
|
url: string;
|
|
137
144
|
}, opts?: ScrubHtmlOptions): Promise<string>;
|
|
138
145
|
|
|
139
|
-
|
|
146
|
+
declare function unifiedHtmlDiff(old: {
|
|
147
|
+
html: string;
|
|
148
|
+
url: string;
|
|
149
|
+
} | Page, current: {
|
|
150
|
+
html: string;
|
|
151
|
+
url: string;
|
|
152
|
+
} | Page): Promise<string>;
|
|
153
|
+
|
|
154
|
+
export { type PageInfo, type ScrubHtmlOptions, type Snapshot, type SnapshotOptions, browse, createDateEngine, createFieldEngine, formatDate, formatDateForInput, formatHtml, fuzzyLocator, getMonthNames, realScrubHtml, screenshot, screenshotElement, scrollToCenter, scrubHtml, setFieldValue, snapshot, suppressInterferences, unifiedHtmlDiff, waitAfterInteraction, waitForAnimationsToFinish, waitForDomIdle, waitForIdle, waitForMeta, waitForUrlChange, waitUntilEnabled };
|
package/dist/index.js
CHANGED
|
@@ -5,6 +5,7 @@ import rehypeStringify from 'rehype-stringify';
|
|
|
5
5
|
import { unified } from 'unified';
|
|
6
6
|
import stringify from 'fast-json-stable-stringify';
|
|
7
7
|
import { JSDOM } from 'jsdom';
|
|
8
|
+
import * as Diff from 'diff';
|
|
8
9
|
|
|
9
10
|
// src/browser.ts
|
|
10
11
|
async function browse(browser, options = {}) {
|
|
@@ -83,11 +84,11 @@ function formatDateForInput(date, type) {
|
|
|
83
84
|
return `${yyyy}-${mm}-${dd}`;
|
|
84
85
|
}
|
|
85
86
|
}
|
|
86
|
-
function formatDate(d,
|
|
87
|
+
function formatDate(d, format2) {
|
|
87
88
|
const dd = String(d.getDate()).padStart(2, "0");
|
|
88
89
|
const mm = String(d.getMonth() + 1).padStart(2, "0");
|
|
89
90
|
const yyyy = String(d.getFullYear());
|
|
90
|
-
return
|
|
91
|
+
return format2.replace("DD", dd).replace("MM", mm).replace("YYYY", yyyy);
|
|
91
92
|
}
|
|
92
93
|
function getMonthNames(locale) {
|
|
93
94
|
const formatter = new Intl.DateTimeFormat(locale, { month: "long" });
|
|
@@ -1658,12 +1659,336 @@ async function screenshotWithMask(page, options) {
|
|
|
1658
1659
|
}
|
|
1659
1660
|
}
|
|
1660
1661
|
|
|
1662
|
+
// src/utils/type-check.ts
|
|
1663
|
+
function isPage(page) {
|
|
1664
|
+
return typeof page.content === "function" && typeof page.url === "function" && typeof page.screenshot === "function";
|
|
1665
|
+
}
|
|
1666
|
+
|
|
1667
|
+
// src/scrub-html.ts
|
|
1668
|
+
var HTML_MIN_ATTR_THRESHOLD = 25e4;
|
|
1669
|
+
var HTML_LIMIT_LISTS_THRESHOLD = 4e5;
|
|
1670
|
+
var HTML_MAIN_ONLY_THRESHOLD = 6e5;
|
|
1671
|
+
function getDefaults(contentLength) {
|
|
1672
|
+
return {
|
|
1673
|
+
dropHidden: true,
|
|
1674
|
+
dropHead: true,
|
|
1675
|
+
dropSvg: false,
|
|
1676
|
+
pickMain: contentLength >= HTML_MAIN_ONLY_THRESHOLD,
|
|
1677
|
+
stripAttributes: contentLength >= HTML_MIN_ATTR_THRESHOLD ? 2 : 1,
|
|
1678
|
+
normalizeWhitespace: true,
|
|
1679
|
+
dropComments: true,
|
|
1680
|
+
replaceBrInHeadings: true,
|
|
1681
|
+
limitLists: contentLength >= HTML_LIMIT_LISTS_THRESHOLD ? 20 : -1,
|
|
1682
|
+
dropUtilityClasses: false
|
|
1683
|
+
};
|
|
1684
|
+
}
|
|
1685
|
+
var ALLOWED_ATTRS = {
|
|
1686
|
+
match: /* @__PURE__ */ new Set([
|
|
1687
|
+
// identity/semantics
|
|
1688
|
+
"id",
|
|
1689
|
+
"class",
|
|
1690
|
+
"role",
|
|
1691
|
+
// internationalization
|
|
1692
|
+
"lang",
|
|
1693
|
+
"dir",
|
|
1694
|
+
// anchors & media
|
|
1695
|
+
"href",
|
|
1696
|
+
"title",
|
|
1697
|
+
"target",
|
|
1698
|
+
"rel",
|
|
1699
|
+
"src",
|
|
1700
|
+
"alt",
|
|
1701
|
+
"width",
|
|
1702
|
+
"height",
|
|
1703
|
+
"loading",
|
|
1704
|
+
// tables
|
|
1705
|
+
"scope",
|
|
1706
|
+
"headers",
|
|
1707
|
+
"colspan",
|
|
1708
|
+
"rowspan",
|
|
1709
|
+
// forms (pure semantics—doesn’t change structure)
|
|
1710
|
+
"name",
|
|
1711
|
+
"value",
|
|
1712
|
+
"type",
|
|
1713
|
+
"for",
|
|
1714
|
+
"placeholder",
|
|
1715
|
+
"checked",
|
|
1716
|
+
"selected",
|
|
1717
|
+
"multiple",
|
|
1718
|
+
"method",
|
|
1719
|
+
"action",
|
|
1720
|
+
// time, figure, etc.
|
|
1721
|
+
"datetime"
|
|
1722
|
+
]),
|
|
1723
|
+
regexp: /^aria-[\w-]+|^data-[\w-]+$/i
|
|
1724
|
+
// ARIA attributes & data-* attributes
|
|
1725
|
+
};
|
|
1726
|
+
var ALLOWED_ATTRS_AGGRESSIVE = {
|
|
1727
|
+
match: /* @__PURE__ */ new Set([
|
|
1728
|
+
// structuur / algemene selectors
|
|
1729
|
+
"id",
|
|
1730
|
+
"class",
|
|
1731
|
+
"role",
|
|
1732
|
+
// links / media
|
|
1733
|
+
"href",
|
|
1734
|
+
"src",
|
|
1735
|
+
"alt",
|
|
1736
|
+
"title",
|
|
1737
|
+
// tables
|
|
1738
|
+
"scope",
|
|
1739
|
+
// forms / velden
|
|
1740
|
+
"name",
|
|
1741
|
+
"type",
|
|
1742
|
+
"for",
|
|
1743
|
+
"placeholder",
|
|
1744
|
+
"value",
|
|
1745
|
+
"checked",
|
|
1746
|
+
"selected",
|
|
1747
|
+
// ARIA voor Playwright getByRole/getByLabel
|
|
1748
|
+
"aria-label",
|
|
1749
|
+
"aria-labelledby",
|
|
1750
|
+
"aria-describedby",
|
|
1751
|
+
// veelgebruikte test selectors
|
|
1752
|
+
"data-testid",
|
|
1753
|
+
"data-test-id",
|
|
1754
|
+
"data-cy",
|
|
1755
|
+
"data-qa"
|
|
1756
|
+
]),
|
|
1757
|
+
regexp: null
|
|
1758
|
+
};
|
|
1759
|
+
var HIDDEN_SELECTORS = [
|
|
1760
|
+
"[hidden]",
|
|
1761
|
+
"[inert]",
|
|
1762
|
+
'[aria-hidden="true"]',
|
|
1763
|
+
'[style*="display:none"]',
|
|
1764
|
+
'[style*="visibility:hidden"]',
|
|
1765
|
+
'[style*="opacity:0"]'
|
|
1766
|
+
].join(",");
|
|
1767
|
+
var ALWAYS_DROP = [
|
|
1768
|
+
"script",
|
|
1769
|
+
"style",
|
|
1770
|
+
"template",
|
|
1771
|
+
"noscript",
|
|
1772
|
+
"slot",
|
|
1773
|
+
"object",
|
|
1774
|
+
"embed"
|
|
1775
|
+
];
|
|
1776
|
+
async function scrubHtml(page, opts = {}) {
|
|
1777
|
+
if (isPage(page)) page = { html: await page.content(), url: page.url() };
|
|
1778
|
+
return await memoizedScrubHtml(page, opts);
|
|
1779
|
+
}
|
|
1780
|
+
var memoizedScrubHtml = memoize(realScrubHtml, {
|
|
1781
|
+
max: 16,
|
|
1782
|
+
ttl: 10 * 6e4,
|
|
1783
|
+
cacheKey: (args) => stringify({ html: args[0].html, url: args[0].url, ...args[1] })
|
|
1784
|
+
});
|
|
1785
|
+
async function realScrubHtml({ html, url }, opts = {}) {
|
|
1786
|
+
const o = { ...getDefaults(html.length), ...opts };
|
|
1787
|
+
const dom = new JSDOM(html, { url });
|
|
1788
|
+
const doc = dom.window.document;
|
|
1789
|
+
if (o.pickMain) pickMain(doc);
|
|
1790
|
+
dropInfraAndSvg(doc, !!o.dropSvg);
|
|
1791
|
+
if (o.dropHidden) dropHiddenTrees(doc);
|
|
1792
|
+
if (o.stripAttributes) stripAttributesAndSanitize(doc, o.stripAttributes);
|
|
1793
|
+
if (o.dropComments) dropHtmlComments(doc);
|
|
1794
|
+
if (o.replaceBrInHeadings) replaceBrsInHeadings(doc);
|
|
1795
|
+
if (o.limitLists >= 0) limitListsAndRows(doc, o.limitLists);
|
|
1796
|
+
if (o.dropUtilityClasses) stripUtilityClasses(doc);
|
|
1797
|
+
if (o.normalizeWhitespace) normalizeWhitespace(doc.body);
|
|
1798
|
+
return doc.body.innerHTML;
|
|
1799
|
+
}
|
|
1800
|
+
function hasHiddenAncestor(el) {
|
|
1801
|
+
let p = el.parentElement;
|
|
1802
|
+
while (p) {
|
|
1803
|
+
if (p.hasAttribute("hidden") || p.hasAttribute("inert") || p.getAttribute("aria-hidden") === "true") return true;
|
|
1804
|
+
const style = p.getAttribute("style") || "";
|
|
1805
|
+
if (/\bdisplay\s*:\s*none\b/i.test(style)) return true;
|
|
1806
|
+
if (/\bvisibility\s*:\s*hidden\b/i.test(style)) return true;
|
|
1807
|
+
if (/\bopacity\s*:\s*0(?:\D|$)/i.test(style)) return true;
|
|
1808
|
+
p = p.parentElement;
|
|
1809
|
+
}
|
|
1810
|
+
return false;
|
|
1811
|
+
}
|
|
1812
|
+
function normalizeWhitespace(root) {
|
|
1813
|
+
const preLike = /* @__PURE__ */ new Set(["PRE", "CODE", "SAMP", "KBD"]);
|
|
1814
|
+
const doc = root.ownerDocument;
|
|
1815
|
+
const walker = doc.createTreeWalker(
|
|
1816
|
+
root,
|
|
1817
|
+
4
|
|
1818
|
+
/*NodeFilter.SHOW_TEXT*/
|
|
1819
|
+
);
|
|
1820
|
+
const changes = [];
|
|
1821
|
+
let node;
|
|
1822
|
+
while (node = walker.nextNode()) {
|
|
1823
|
+
const text = node;
|
|
1824
|
+
const parent = text.parentElement;
|
|
1825
|
+
if (!parent) continue;
|
|
1826
|
+
if (preLike.has(parent.tagName)) continue;
|
|
1827
|
+
const v = text.nodeValue ?? "";
|
|
1828
|
+
const collapsed = v.replace(/\s+/g, " ");
|
|
1829
|
+
if (collapsed !== v) changes.push(text);
|
|
1830
|
+
}
|
|
1831
|
+
for (const t of changes) {
|
|
1832
|
+
const parent = t.parentElement;
|
|
1833
|
+
const isBlockish = /^(P|LI|DIV|SECTION|ARTICLE|ASIDE|HEADER|FOOTER|MAIN|NAV|H[1-6]|BLOCKQUOTE|FIGCAPTION|TD|TH)$/i.test(parent.tagName);
|
|
1834
|
+
t.nodeValue = (t.nodeValue || "").replace(/\s+/g, " ");
|
|
1835
|
+
if (isBlockish) t.nodeValue = (t.nodeValue || "").trim();
|
|
1836
|
+
}
|
|
1837
|
+
}
|
|
1838
|
+
function pickMain(doc) {
|
|
1839
|
+
const main = doc.querySelector("main");
|
|
1840
|
+
if (!main) return false;
|
|
1841
|
+
const clone = main.cloneNode(true);
|
|
1842
|
+
doc.body.innerHTML = "";
|
|
1843
|
+
doc.body.appendChild(clone);
|
|
1844
|
+
return true;
|
|
1845
|
+
}
|
|
1846
|
+
function dropInfraAndSvg(doc, dropSvg) {
|
|
1847
|
+
const toDrop = [...ALWAYS_DROP, dropSvg ? "svg" : ""].filter(Boolean).join(",");
|
|
1848
|
+
if (!toDrop) return;
|
|
1849
|
+
doc.querySelectorAll(toDrop).forEach((el) => el.remove());
|
|
1850
|
+
}
|
|
1851
|
+
function dropHiddenTrees(doc) {
|
|
1852
|
+
doc.querySelectorAll(HIDDEN_SELECTORS).forEach((el) => el.remove());
|
|
1853
|
+
const all = [...doc.body.querySelectorAll("*")];
|
|
1854
|
+
for (const el of all) {
|
|
1855
|
+
if (!el.isConnected) continue;
|
|
1856
|
+
if (hasHiddenAncestor(el)) el.remove();
|
|
1857
|
+
}
|
|
1858
|
+
}
|
|
1859
|
+
function stripAttributesAndSanitize(doc, level) {
|
|
1860
|
+
if (!level) return;
|
|
1861
|
+
const all = [...doc.body.querySelectorAll("*")];
|
|
1862
|
+
for (const el of all) {
|
|
1863
|
+
const isSvg = el.namespaceURI === "http://www.w3.org/2000/svg";
|
|
1864
|
+
for (const { name } of [...el.attributes]) {
|
|
1865
|
+
const lower = name.toLowerCase();
|
|
1866
|
+
if (lower.startsWith("on")) {
|
|
1867
|
+
el.removeAttribute(name);
|
|
1868
|
+
continue;
|
|
1869
|
+
}
|
|
1870
|
+
if (lower === "style") {
|
|
1871
|
+
el.removeAttribute(name);
|
|
1872
|
+
continue;
|
|
1873
|
+
}
|
|
1874
|
+
if (isSvg) continue;
|
|
1875
|
+
const allowed = level === 1 ? ALLOWED_ATTRS : ALLOWED_ATTRS_AGGRESSIVE;
|
|
1876
|
+
if (!allowed.match.has(lower) && !allowed.regexp?.test(name)) {
|
|
1877
|
+
el.removeAttribute(name);
|
|
1878
|
+
}
|
|
1879
|
+
}
|
|
1880
|
+
}
|
|
1881
|
+
doc.querySelectorAll("a[href]").forEach((a) => {
|
|
1882
|
+
const href = a.getAttribute("href") || "";
|
|
1883
|
+
if (/^\s*javascript:/i.test(href)) a.removeAttribute("href");
|
|
1884
|
+
});
|
|
1885
|
+
}
|
|
1886
|
+
function dropHtmlComments(doc) {
|
|
1887
|
+
const nf = doc.defaultView?.NodeFilter;
|
|
1888
|
+
const SHOW_COMMENT = nf?.SHOW_COMMENT ?? 128;
|
|
1889
|
+
const walker = doc.createTreeWalker(doc, SHOW_COMMENT);
|
|
1890
|
+
const toRemove = [];
|
|
1891
|
+
let n;
|
|
1892
|
+
while (n = walker.nextNode()) toRemove.push(n);
|
|
1893
|
+
toRemove.forEach((c) => c.parentNode?.removeChild(c));
|
|
1894
|
+
}
|
|
1895
|
+
function replaceBrsInHeadings(doc) {
|
|
1896
|
+
doc.querySelectorAll("h1, h2, h3, h4, h5, h6").forEach((h) => {
|
|
1897
|
+
h.querySelectorAll("br").forEach((br) => {
|
|
1898
|
+
const space = doc.createTextNode(" ");
|
|
1899
|
+
br.replaceWith(space);
|
|
1900
|
+
});
|
|
1901
|
+
});
|
|
1902
|
+
}
|
|
1903
|
+
var UTILITY_VARIANT_RE = /:/;
|
|
1904
|
+
var UTILITY_PREFIX_RE = /^-?(?:p[xytblrse]?|m[xytblrse]?|gap|space-[xy]|w|h|min-w|min-h|max-w|max-h|size|basis|inset|top|right|bottom|left|start|end|z|text|bg|border|ring|shadow|outline|fill|stroke|divide|accent|caret|from|via|to|decoration|font|leading|tracking|indent|line-clamp|columns|aspect|object|opacity|rotate|scale|translate|skew|transition|duration|ease|delay|animate|rounded|overflow|overscroll|scroll|snap|touch|cursor|pointer-events|select|resize|flex|grid|col|row|order|auto-cols|auto-rows|items|justify|content|self|place|float|clear|list|whitespace|break|hyphens|mix-blend|bg-blend|backdrop|d|g|fs|fw|lh|align|position)-/i;
|
|
1905
|
+
var UTILITY_STANDALONE = /* @__PURE__ */ new Set([
|
|
1906
|
+
"flex",
|
|
1907
|
+
"grid",
|
|
1908
|
+
"block",
|
|
1909
|
+
"hidden",
|
|
1910
|
+
"inline",
|
|
1911
|
+
"inline-block",
|
|
1912
|
+
"inline-flex",
|
|
1913
|
+
"inline-grid",
|
|
1914
|
+
"contents",
|
|
1915
|
+
"flow-root",
|
|
1916
|
+
"list-item",
|
|
1917
|
+
"table",
|
|
1918
|
+
"container",
|
|
1919
|
+
"truncate",
|
|
1920
|
+
"grow",
|
|
1921
|
+
"shrink",
|
|
1922
|
+
"static",
|
|
1923
|
+
"relative",
|
|
1924
|
+
"absolute",
|
|
1925
|
+
"fixed",
|
|
1926
|
+
"sticky",
|
|
1927
|
+
"visible",
|
|
1928
|
+
"invisible",
|
|
1929
|
+
"collapse",
|
|
1930
|
+
"isolate",
|
|
1931
|
+
"underline",
|
|
1932
|
+
"overline",
|
|
1933
|
+
"line-through",
|
|
1934
|
+
"no-underline",
|
|
1935
|
+
"uppercase",
|
|
1936
|
+
"lowercase",
|
|
1937
|
+
"capitalize",
|
|
1938
|
+
"normal-case",
|
|
1939
|
+
"italic",
|
|
1940
|
+
"not-italic",
|
|
1941
|
+
"antialiased",
|
|
1942
|
+
"subpixel-antialiased",
|
|
1943
|
+
"sr-only",
|
|
1944
|
+
"not-sr-only",
|
|
1945
|
+
"clearfix",
|
|
1946
|
+
"row",
|
|
1947
|
+
"col"
|
|
1948
|
+
]);
|
|
1949
|
+
function isUtilityClass(token) {
|
|
1950
|
+
if (UTILITY_VARIANT_RE.test(token)) return true;
|
|
1951
|
+
const base = token.startsWith("-") ? token.slice(1) : token;
|
|
1952
|
+
if (UTILITY_STANDALONE.has(base)) return true;
|
|
1953
|
+
return UTILITY_PREFIX_RE.test(token);
|
|
1954
|
+
}
|
|
1955
|
+
function stripUtilityClasses(doc) {
|
|
1956
|
+
for (const el of doc.body.querySelectorAll("[class]")) {
|
|
1957
|
+
const kept = el.className.split(/\s+/).filter((t) => t && !isUtilityClass(t));
|
|
1958
|
+
if (kept.length === 0) el.removeAttribute("class");
|
|
1959
|
+
else el.className = kept.join(" ");
|
|
1960
|
+
}
|
|
1961
|
+
}
|
|
1962
|
+
function limitListsAndRows(doc, limit) {
|
|
1963
|
+
doc.querySelectorAll("ul, ol").forEach((list) => {
|
|
1964
|
+
const items = Array.from(list.children).filter((c) => c.tagName === "LI");
|
|
1965
|
+
for (let i = limit; i < items.length; i++) items[i].remove();
|
|
1966
|
+
});
|
|
1967
|
+
const rowContainers = doc.querySelectorAll("table, thead, tbody, tfoot");
|
|
1968
|
+
rowContainers.forEach((container) => {
|
|
1969
|
+
const rows = Array.from(container.children).filter((c) => c.tagName === "TR");
|
|
1970
|
+
for (let i = limit; i < rows.length; i++) rows[i].remove();
|
|
1971
|
+
});
|
|
1972
|
+
}
|
|
1973
|
+
|
|
1661
1974
|
// src/snapshot.ts
|
|
1662
|
-
async function snapshot(page) {
|
|
1975
|
+
async function snapshot(page, opts = {}) {
|
|
1663
1976
|
await sleep(500);
|
|
1664
1977
|
await waitForDomIdle(page);
|
|
1665
1978
|
const [url, html, file] = await Promise.all([page.url(), getContentWithMarkedHidden(page), screenshot(page)]);
|
|
1666
|
-
|
|
1979
|
+
const finalHtml = opts.dropUtilityClasses ? await realScrubHtml({ html, url }, {
|
|
1980
|
+
dropHidden: false,
|
|
1981
|
+
dropHead: false,
|
|
1982
|
+
dropSvg: false,
|
|
1983
|
+
pickMain: false,
|
|
1984
|
+
stripAttributes: 0,
|
|
1985
|
+
normalizeWhitespace: false,
|
|
1986
|
+
dropComments: false,
|
|
1987
|
+
replaceBrInHeadings: false,
|
|
1988
|
+
limitLists: -1,
|
|
1989
|
+
dropUtilityClasses: true
|
|
1990
|
+
}) : html;
|
|
1991
|
+
return { url, html: finalHtml, screenshot: file };
|
|
1667
1992
|
}
|
|
1668
1993
|
async function getContentWithMarkedHidden(page) {
|
|
1669
1994
|
try {
|
|
@@ -3190,258 +3515,17 @@ async function suppressInterferences(page, opts = {}) {
|
|
|
3190
3515
|
await sleep(pollIntervalMs);
|
|
3191
3516
|
}
|
|
3192
3517
|
}
|
|
3193
|
-
|
|
3194
|
-
|
|
3195
|
-
|
|
3196
|
-
return typeof page.content === "function" && typeof page.url === "function" && typeof page.screenshot === "function";
|
|
3197
|
-
}
|
|
3198
|
-
|
|
3199
|
-
// src/scrub-html.ts
|
|
3200
|
-
var HTML_MIN_ATTR_THRESHOLD = 25e4;
|
|
3201
|
-
var HTML_LIMIT_LISTS_THRESHOLD = 4e5;
|
|
3202
|
-
var HTML_MAIN_ONLY_THRESHOLD = 6e5;
|
|
3203
|
-
function getDefaults(contentLength) {
|
|
3204
|
-
return {
|
|
3205
|
-
dropHidden: true,
|
|
3206
|
-
dropHead: true,
|
|
3207
|
-
dropSvg: false,
|
|
3208
|
-
pickMain: contentLength >= HTML_MAIN_ONLY_THRESHOLD,
|
|
3209
|
-
stripAttributes: contentLength >= HTML_MIN_ATTR_THRESHOLD ? 2 : 1,
|
|
3210
|
-
normalizeWhitespace: true,
|
|
3211
|
-
dropComments: true,
|
|
3212
|
-
replaceBrInHeadings: true,
|
|
3213
|
-
limitLists: contentLength >= HTML_LIMIT_LISTS_THRESHOLD ? 20 : -1
|
|
3214
|
-
};
|
|
3215
|
-
}
|
|
3216
|
-
var ALLOWED_ATTRS = {
|
|
3217
|
-
match: /* @__PURE__ */ new Set([
|
|
3218
|
-
// identity/semantics
|
|
3219
|
-
"id",
|
|
3220
|
-
"class",
|
|
3221
|
-
"role",
|
|
3222
|
-
// internationalization
|
|
3223
|
-
"lang",
|
|
3224
|
-
"dir",
|
|
3225
|
-
// anchors & media
|
|
3226
|
-
"href",
|
|
3227
|
-
"title",
|
|
3228
|
-
"target",
|
|
3229
|
-
"rel",
|
|
3230
|
-
"src",
|
|
3231
|
-
"alt",
|
|
3232
|
-
"width",
|
|
3233
|
-
"height",
|
|
3234
|
-
"loading",
|
|
3235
|
-
// tables
|
|
3236
|
-
"scope",
|
|
3237
|
-
"headers",
|
|
3238
|
-
"colspan",
|
|
3239
|
-
"rowspan",
|
|
3240
|
-
// forms (pure semantics—doesn’t change structure)
|
|
3241
|
-
"name",
|
|
3242
|
-
"value",
|
|
3243
|
-
"type",
|
|
3244
|
-
"for",
|
|
3245
|
-
"placeholder",
|
|
3246
|
-
"checked",
|
|
3247
|
-
"selected",
|
|
3248
|
-
"multiple",
|
|
3249
|
-
"method",
|
|
3250
|
-
"action",
|
|
3251
|
-
// time, figure, etc.
|
|
3252
|
-
"datetime"
|
|
3253
|
-
]),
|
|
3254
|
-
regexp: /^aria-[\w-]+|^data-[\w-]+$/i
|
|
3255
|
-
// ARIA attributes & data-* attributes
|
|
3256
|
-
};
|
|
3257
|
-
var ALLOWED_ATTRS_AGGRESSIVE = {
|
|
3258
|
-
match: /* @__PURE__ */ new Set([
|
|
3259
|
-
// structuur / algemene selectors
|
|
3260
|
-
"id",
|
|
3261
|
-
"class",
|
|
3262
|
-
"role",
|
|
3263
|
-
// links / media
|
|
3264
|
-
"href",
|
|
3265
|
-
"src",
|
|
3266
|
-
"alt",
|
|
3267
|
-
"title",
|
|
3268
|
-
// tables
|
|
3269
|
-
"scope",
|
|
3270
|
-
// forms / velden
|
|
3271
|
-
"name",
|
|
3272
|
-
"type",
|
|
3273
|
-
"for",
|
|
3274
|
-
"placeholder",
|
|
3275
|
-
"value",
|
|
3276
|
-
"checked",
|
|
3277
|
-
"selected",
|
|
3278
|
-
// ARIA voor Playwright getByRole/getByLabel
|
|
3279
|
-
"aria-label",
|
|
3280
|
-
"aria-labelledby",
|
|
3281
|
-
"aria-describedby",
|
|
3282
|
-
// veelgebruikte test selectors
|
|
3283
|
-
"data-testid",
|
|
3284
|
-
"data-test-id",
|
|
3285
|
-
"data-cy",
|
|
3286
|
-
"data-qa"
|
|
3287
|
-
]),
|
|
3288
|
-
regexp: null
|
|
3289
|
-
};
|
|
3290
|
-
var HIDDEN_SELECTORS = [
|
|
3291
|
-
"[hidden]",
|
|
3292
|
-
"[inert]",
|
|
3293
|
-
'[aria-hidden="true"]',
|
|
3294
|
-
'[style*="display:none"]',
|
|
3295
|
-
'[style*="visibility:hidden"]',
|
|
3296
|
-
'[style*="opacity:0"]'
|
|
3297
|
-
].join(",");
|
|
3298
|
-
var ALWAYS_DROP = [
|
|
3299
|
-
"script",
|
|
3300
|
-
"style",
|
|
3301
|
-
"template",
|
|
3302
|
-
"noscript",
|
|
3303
|
-
"slot",
|
|
3304
|
-
"object",
|
|
3305
|
-
"embed"
|
|
3306
|
-
];
|
|
3307
|
-
async function scrubHtml(page, opts = {}) {
|
|
3308
|
-
if (isPage(page)) page = { html: await page.content(), url: page.url() };
|
|
3309
|
-
return await memoizedScrubHtml(page, opts);
|
|
3310
|
-
}
|
|
3311
|
-
var memoizedScrubHtml = memoize(realScrubHtml, {
|
|
3312
|
-
max: 16,
|
|
3313
|
-
ttl: 10 * 6e4,
|
|
3314
|
-
cacheKey: (args) => stringify({ html: args[0].html, url: args[0].url, ...args[1] })
|
|
3315
|
-
});
|
|
3316
|
-
async function realScrubHtml({ html, url }, opts = {}) {
|
|
3317
|
-
const o = { ...getDefaults(html.length), ...opts };
|
|
3318
|
-
const dom = new JSDOM(html, { url });
|
|
3319
|
-
const doc = dom.window.document;
|
|
3320
|
-
if (o.pickMain) pickMain(doc);
|
|
3321
|
-
dropInfraAndSvg(doc, !!o.dropSvg);
|
|
3322
|
-
if (o.dropHidden) dropHiddenTrees(doc);
|
|
3323
|
-
if (o.stripAttributes) stripAttributesAndSanitize(doc, o.stripAttributes);
|
|
3324
|
-
if (o.dropComments) dropHtmlComments(doc);
|
|
3325
|
-
if (o.replaceBrInHeadings) replaceBrsInHeadings(doc);
|
|
3326
|
-
if (o.limitLists >= 0) limitListsAndRows(doc, o.limitLists);
|
|
3327
|
-
if (o.normalizeWhitespace) normalizeWhitespace(doc.body);
|
|
3328
|
-
return doc.body.innerHTML;
|
|
3329
|
-
}
|
|
3330
|
-
function hasHiddenAncestor(el) {
|
|
3331
|
-
let p = el.parentElement;
|
|
3332
|
-
while (p) {
|
|
3333
|
-
if (p.hasAttribute("hidden") || p.hasAttribute("inert") || p.getAttribute("aria-hidden") === "true") return true;
|
|
3334
|
-
const style = p.getAttribute("style") || "";
|
|
3335
|
-
if (/\bdisplay\s*:\s*none\b/i.test(style)) return true;
|
|
3336
|
-
if (/\bvisibility\s*:\s*hidden\b/i.test(style)) return true;
|
|
3337
|
-
if (/\bopacity\s*:\s*0(?:\D|$)/i.test(style)) return true;
|
|
3338
|
-
p = p.parentElement;
|
|
3339
|
-
}
|
|
3340
|
-
return false;
|
|
3341
|
-
}
|
|
3342
|
-
function normalizeWhitespace(root) {
|
|
3343
|
-
const preLike = /* @__PURE__ */ new Set(["PRE", "CODE", "SAMP", "KBD"]);
|
|
3344
|
-
const doc = root.ownerDocument;
|
|
3345
|
-
const walker = doc.createTreeWalker(
|
|
3346
|
-
root,
|
|
3347
|
-
4
|
|
3348
|
-
/*NodeFilter.SHOW_TEXT*/
|
|
3349
|
-
);
|
|
3350
|
-
const changes = [];
|
|
3351
|
-
let node;
|
|
3352
|
-
while (node = walker.nextNode()) {
|
|
3353
|
-
const text = node;
|
|
3354
|
-
const parent = text.parentElement;
|
|
3355
|
-
if (!parent) continue;
|
|
3356
|
-
if (preLike.has(parent.tagName)) continue;
|
|
3357
|
-
const v = text.nodeValue ?? "";
|
|
3358
|
-
const collapsed = v.replace(/\s+/g, " ");
|
|
3359
|
-
if (collapsed !== v) changes.push(text);
|
|
3360
|
-
}
|
|
3361
|
-
for (const t of changes) {
|
|
3362
|
-
const parent = t.parentElement;
|
|
3363
|
-
const isBlockish = /^(P|LI|DIV|SECTION|ARTICLE|ASIDE|HEADER|FOOTER|MAIN|NAV|H[1-6]|BLOCKQUOTE|FIGCAPTION|TD|TH)$/i.test(parent.tagName);
|
|
3364
|
-
t.nodeValue = (t.nodeValue || "").replace(/\s+/g, " ");
|
|
3365
|
-
if (isBlockish) t.nodeValue = (t.nodeValue || "").trim();
|
|
3366
|
-
}
|
|
3367
|
-
}
|
|
3368
|
-
function pickMain(doc) {
|
|
3369
|
-
const main = doc.querySelector("main");
|
|
3370
|
-
if (!main) return false;
|
|
3371
|
-
const clone = main.cloneNode(true);
|
|
3372
|
-
doc.body.innerHTML = "";
|
|
3373
|
-
doc.body.appendChild(clone);
|
|
3374
|
-
return true;
|
|
3375
|
-
}
|
|
3376
|
-
function dropInfraAndSvg(doc, dropSvg) {
|
|
3377
|
-
const toDrop = [...ALWAYS_DROP, dropSvg ? "svg" : ""].filter(Boolean).join(",");
|
|
3378
|
-
if (!toDrop) return;
|
|
3379
|
-
doc.querySelectorAll(toDrop).forEach((el) => el.remove());
|
|
3518
|
+
async function format(rawHtml, url) {
|
|
3519
|
+
const html = await scrubHtml({ html: rawHtml, url });
|
|
3520
|
+
return await formatHtml(html);
|
|
3380
3521
|
}
|
|
3381
|
-
function
|
|
3382
|
-
|
|
3383
|
-
|
|
3384
|
-
|
|
3385
|
-
|
|
3386
|
-
if (hasHiddenAncestor(el)) el.remove();
|
|
3387
|
-
}
|
|
3388
|
-
}
|
|
3389
|
-
function stripAttributesAndSanitize(doc, level) {
|
|
3390
|
-
if (!level) return;
|
|
3391
|
-
const all = [...doc.body.querySelectorAll("*")];
|
|
3392
|
-
for (const el of all) {
|
|
3393
|
-
const isSvg = el.namespaceURI === "http://www.w3.org/2000/svg";
|
|
3394
|
-
for (const { name } of [...el.attributes]) {
|
|
3395
|
-
const lower = name.toLowerCase();
|
|
3396
|
-
if (lower.startsWith("on")) {
|
|
3397
|
-
el.removeAttribute(name);
|
|
3398
|
-
continue;
|
|
3399
|
-
}
|
|
3400
|
-
if (lower === "style") {
|
|
3401
|
-
el.removeAttribute(name);
|
|
3402
|
-
continue;
|
|
3403
|
-
}
|
|
3404
|
-
if (isSvg) continue;
|
|
3405
|
-
const allowed = level === 1 ? ALLOWED_ATTRS : ALLOWED_ATTRS_AGGRESSIVE;
|
|
3406
|
-
if (!allowed.match.has(lower) && !allowed.regexp?.test(name)) {
|
|
3407
|
-
el.removeAttribute(name);
|
|
3408
|
-
}
|
|
3409
|
-
}
|
|
3410
|
-
}
|
|
3411
|
-
doc.querySelectorAll("a[href]").forEach((a) => {
|
|
3412
|
-
const href = a.getAttribute("href") || "";
|
|
3413
|
-
if (/^\s*javascript:/i.test(href)) a.removeAttribute("href");
|
|
3414
|
-
});
|
|
3415
|
-
}
|
|
3416
|
-
function dropHtmlComments(doc) {
|
|
3417
|
-
const nf = doc.defaultView?.NodeFilter;
|
|
3418
|
-
const SHOW_COMMENT = nf?.SHOW_COMMENT ?? 128;
|
|
3419
|
-
const walker = doc.createTreeWalker(doc, SHOW_COMMENT);
|
|
3420
|
-
const toRemove = [];
|
|
3421
|
-
let n;
|
|
3422
|
-
while (n = walker.nextNode()) toRemove.push(n);
|
|
3423
|
-
toRemove.forEach((c) => c.parentNode?.removeChild(c));
|
|
3424
|
-
}
|
|
3425
|
-
function replaceBrsInHeadings(doc) {
|
|
3426
|
-
doc.querySelectorAll("h1, h2, h3, h4, h5, h6").forEach((h) => {
|
|
3427
|
-
h.querySelectorAll("br").forEach((br) => {
|
|
3428
|
-
const space = doc.createTextNode(" ");
|
|
3429
|
-
br.replaceWith(space);
|
|
3430
|
-
});
|
|
3431
|
-
});
|
|
3432
|
-
}
|
|
3433
|
-
function limitListsAndRows(doc, limit) {
|
|
3434
|
-
doc.querySelectorAll("ul, ol").forEach((list) => {
|
|
3435
|
-
const items = Array.from(list.children).filter((c) => c.tagName === "LI");
|
|
3436
|
-
for (let i = limit; i < items.length; i++) items[i].remove();
|
|
3437
|
-
});
|
|
3438
|
-
const rowContainers = doc.querySelectorAll("table, thead, tbody, tfoot");
|
|
3439
|
-
rowContainers.forEach((container) => {
|
|
3440
|
-
const rows = Array.from(container.children).filter((c) => c.tagName === "TR");
|
|
3441
|
-
for (let i = limit; i < rows.length; i++) rows[i].remove();
|
|
3442
|
-
});
|
|
3522
|
+
async function unifiedHtmlDiff(old, current) {
|
|
3523
|
+
if (isPage(old)) old = { html: await old.content(), url: old.url() };
|
|
3524
|
+
if (isPage(current)) current = { html: await current.content(), url: current.url() };
|
|
3525
|
+
const [a, b] = await Promise.all([format(old.html, old.url), format(current.html, current.url)]);
|
|
3526
|
+
return Diff.createTwoFilesPatch("before.html", "after.html", a, b);
|
|
3443
3527
|
}
|
|
3444
3528
|
|
|
3445
|
-
export { browse, createDateEngine, createFieldEngine, formatDate, formatDateForInput, formatHtml, fuzzyLocator, getMonthNames, realScrubHtml, screenshot, screenshotElement, scrollToCenter, scrubHtml, setFieldValue, snapshot, suppressInterferences, waitAfterInteraction, waitForAnimationsToFinish, waitForDomIdle, waitForIdle, waitForMeta, waitForUrlChange, waitUntilEnabled };
|
|
3529
|
+
export { browse, createDateEngine, createFieldEngine, formatDate, formatDateForInput, formatHtml, fuzzyLocator, getMonthNames, realScrubHtml, screenshot, screenshotElement, scrollToCenter, scrubHtml, setFieldValue, snapshot, suppressInterferences, unifiedHtmlDiff, waitAfterInteraction, waitForAnimationsToFinish, waitForDomIdle, waitForIdle, waitForMeta, waitForUrlChange, waitUntilEnabled };
|
|
3446
3530
|
//# sourceMappingURL=index.js.map
|
|
3447
3531
|
//# sourceMappingURL=index.js.map
|