playwriter 0.0.42 → 0.0.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/clean-html.d.ts +11 -0
- package/dist/clean-html.d.ts.map +1 -0
- package/dist/clean-html.js +115 -0
- package/dist/clean-html.js.map +1 -0
- package/dist/htmlrewrite.d.ts +8 -0
- package/dist/htmlrewrite.d.ts.map +1 -0
- package/dist/htmlrewrite.js +221 -0
- package/dist/htmlrewrite.js.map +1 -0
- package/dist/htmlrewrite.test.d.ts +2 -0
- package/dist/htmlrewrite.test.d.ts.map +1 -0
- package/dist/htmlrewrite.test.js +13987 -0
- package/dist/htmlrewrite.test.js.map +1 -0
- package/dist/mcp.d.ts.map +1 -1
- package/dist/mcp.js +61 -18
- package/dist/mcp.js.map +1 -1
- package/dist/mcp.test.js +82 -1
- package/dist/mcp.test.js.map +1 -1
- package/package.json +3 -1
- package/src/__snapshots__/x.com.processed.html +1294 -0
- package/src/__snapshots__/x.com.processed.withStyles.html +4697 -0
- package/src/assets/aria-labels-github.png +0 -0
- package/src/assets/aria-labels-google-snapshot.txt +1 -1
- package/src/assets/aria-labels-hacker-news-snapshot.txt +690 -687
- package/src/assets/aria-labels-hacker-news.png +0 -0
- package/src/assets/x.com.html +32946 -0
- package/src/clean-html.ts +154 -0
- package/src/htmlrewrite.test.ts +14014 -0
- package/src/htmlrewrite.ts +255 -0
- package/src/mcp.test.ts +92 -1
- package/src/mcp.ts +67 -20
- package/src/prompt.md +36 -3
- package/src/snapshots/shadcn-ui-accessibility.md +2 -2
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { Page, Locator } from 'playwright-core';
|
|
2
|
+
export interface GetCleanHTMLOptions {
|
|
3
|
+
locator: Locator | Page;
|
|
4
|
+
search?: string | RegExp;
|
|
5
|
+
showDiffSinceLastCall?: boolean;
|
|
6
|
+
includeStyles?: boolean;
|
|
7
|
+
maxAttrLen?: number;
|
|
8
|
+
maxContentLen?: number;
|
|
9
|
+
}
|
|
10
|
+
export declare function getCleanHTML(options: GetCleanHTMLOptions): Promise<string>;
|
|
11
|
+
//# sourceMappingURL=clean-html.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"clean-html.d.ts","sourceRoot":"","sources":["../src/clean-html.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAA;AAI/C,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAA;IACvB,MAAM,CAAC,EAAE,MAAM,GAAG,MAAM,CAAA;IACxB,qBAAqB,CAAC,EAAE,OAAO,CAAA;IAC/B,aAAa,CAAC,EAAE,OAAO,CAAA;IACvB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,aAAa,CAAC,EAAE,MAAM,CAAA;CACvB;AAuBD,wBAAsB,YAAY,CAAC,OAAO,EAAE,mBAAmB,GAAG,OAAO,CAAC,MAAM,CAAC,CAuHhF"}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import { createPatch } from 'diff';
|
|
2
|
+
import { formatHtmlForPrompt } from './htmlrewrite.js';
|
|
3
|
+
// Store last HTML snapshots per locator/page for diffing
|
|
4
|
+
const lastHtmlSnapshots = new WeakMap();
|
|
5
|
+
function isPage(obj) {
|
|
6
|
+
return obj && typeof obj.content === 'function' && typeof obj.goto === 'function';
|
|
7
|
+
}
|
|
8
|
+
function isRegExp(value) {
|
|
9
|
+
return (typeof value === 'object' && value !== null && typeof value.test === 'function' && typeof value.exec === 'function');
|
|
10
|
+
}
|
|
11
|
+
function getSnapshotKey(locator) {
|
|
12
|
+
if (isPage(locator)) {
|
|
13
|
+
return '__page__';
|
|
14
|
+
}
|
|
15
|
+
// For locators, use a string representation
|
|
16
|
+
return locator._selector || '__locator__';
|
|
17
|
+
}
|
|
18
|
+
export async function getCleanHTML(options) {
|
|
19
|
+
const { locator, search, showDiffSinceLastCall = false, includeStyles = false, maxAttrLen = 200, maxContentLen = 500, } = options;
|
|
20
|
+
// Get raw HTML
|
|
21
|
+
let rawHtml;
|
|
22
|
+
let page;
|
|
23
|
+
if (isPage(locator)) {
|
|
24
|
+
page = locator;
|
|
25
|
+
rawHtml = await locator.content();
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
page = locator.page();
|
|
29
|
+
rawHtml = await locator.innerHTML();
|
|
30
|
+
}
|
|
31
|
+
// Clean the HTML using formatHtmlForPrompt
|
|
32
|
+
const cleanedHtml = await formatHtmlForPrompt({
|
|
33
|
+
html: rawHtml,
|
|
34
|
+
keepStyles: includeStyles,
|
|
35
|
+
maxAttrLen,
|
|
36
|
+
maxContentLen,
|
|
37
|
+
});
|
|
38
|
+
// Sanitize to remove unpaired surrogates that break JSON encoding
|
|
39
|
+
let htmlStr = cleanedHtml.toWellFormed?.() ?? cleanedHtml;
|
|
40
|
+
// Handle diffing
|
|
41
|
+
if (showDiffSinceLastCall) {
|
|
42
|
+
let pageSnapshots = lastHtmlSnapshots.get(page);
|
|
43
|
+
if (!pageSnapshots) {
|
|
44
|
+
pageSnapshots = new Map();
|
|
45
|
+
lastHtmlSnapshots.set(page, pageSnapshots);
|
|
46
|
+
}
|
|
47
|
+
const snapshotKey = getSnapshotKey(locator);
|
|
48
|
+
const previousSnapshot = pageSnapshots.get(snapshotKey);
|
|
49
|
+
if (!previousSnapshot) {
|
|
50
|
+
pageSnapshots.set(snapshotKey, htmlStr);
|
|
51
|
+
return 'No previous snapshot available. This is the first call for this locator. Full snapshot stored for next diff.';
|
|
52
|
+
}
|
|
53
|
+
const patch = createPatch('html', previousSnapshot, htmlStr, 'previous', 'current', {
|
|
54
|
+
context: 3,
|
|
55
|
+
});
|
|
56
|
+
pageSnapshots.set(snapshotKey, htmlStr);
|
|
57
|
+
if (patch.split('\n').length <= 4) {
|
|
58
|
+
return 'No changes detected since last snapshot';
|
|
59
|
+
}
|
|
60
|
+
return patch;
|
|
61
|
+
}
|
|
62
|
+
// Store snapshot for future diffs
|
|
63
|
+
let pageSnapshots = lastHtmlSnapshots.get(page);
|
|
64
|
+
if (!pageSnapshots) {
|
|
65
|
+
pageSnapshots = new Map();
|
|
66
|
+
lastHtmlSnapshots.set(page, pageSnapshots);
|
|
67
|
+
}
|
|
68
|
+
pageSnapshots.set(getSnapshotKey(locator), htmlStr);
|
|
69
|
+
// Handle search
|
|
70
|
+
if (search) {
|
|
71
|
+
const lines = htmlStr.split('\n');
|
|
72
|
+
const matchIndices = [];
|
|
73
|
+
for (let i = 0; i < lines.length; i++) {
|
|
74
|
+
const line = lines[i];
|
|
75
|
+
let isMatch = false;
|
|
76
|
+
if (isRegExp(search)) {
|
|
77
|
+
isMatch = search.test(line);
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
isMatch = line.includes(search);
|
|
81
|
+
}
|
|
82
|
+
if (isMatch) {
|
|
83
|
+
matchIndices.push(i);
|
|
84
|
+
if (matchIndices.length >= 10)
|
|
85
|
+
break;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
if (matchIndices.length === 0) {
|
|
89
|
+
return 'No matches found';
|
|
90
|
+
}
|
|
91
|
+
// Collect lines with 5 lines of context above and below each match
|
|
92
|
+
const CONTEXT_LINES = 5;
|
|
93
|
+
const includedLines = new Set();
|
|
94
|
+
for (const idx of matchIndices) {
|
|
95
|
+
const start = Math.max(0, idx - CONTEXT_LINES);
|
|
96
|
+
const end = Math.min(lines.length - 1, idx + CONTEXT_LINES);
|
|
97
|
+
for (let i = start; i <= end; i++) {
|
|
98
|
+
includedLines.add(i);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
// Build result with separators between non-contiguous sections
|
|
102
|
+
const sortedIndices = [...includedLines].sort((a, b) => a - b);
|
|
103
|
+
const result = [];
|
|
104
|
+
for (let i = 0; i < sortedIndices.length; i++) {
|
|
105
|
+
const lineIdx = sortedIndices[i];
|
|
106
|
+
if (i > 0 && sortedIndices[i - 1] !== lineIdx - 1) {
|
|
107
|
+
result.push('---');
|
|
108
|
+
}
|
|
109
|
+
result.push(lines[lineIdx]);
|
|
110
|
+
}
|
|
111
|
+
return result.join('\n');
|
|
112
|
+
}
|
|
113
|
+
return htmlStr;
|
|
114
|
+
}
|
|
115
|
+
//# sourceMappingURL=clean-html.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"clean-html.js","sourceRoot":"","sources":["../src/clean-html.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,MAAM,CAAA;AAClC,OAAO,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAA;AAWtD,yDAAyD;AACzD,MAAM,iBAAiB,GAAuC,IAAI,OAAO,EAAE,CAAA;AAE3E,SAAS,MAAM,CAAC,GAAQ;IACtB,OAAO,GAAG,IAAI,OAAO,GAAG,CAAC,OAAO,KAAK,UAAU,IAAI,OAAO,GAAG,CAAC,IAAI,KAAK,UAAU,CAAA;AACnF,CAAC;AAED,SAAS,QAAQ,CAAC,KAAU;IAC1B,OAAO,CACL,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,CAAC,IAAI,KAAK,UAAU,IAAI,OAAO,KAAK,CAAC,IAAI,KAAK,UAAU,CACpH,CAAA;AACH,CAAC;AAED,SAAS,cAAc,CAAC,OAAuB;IAC7C,IAAI,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;QACpB,OAAO,UAAU,CAAA;IACnB,CAAC;IACD,4CAA4C;IAC5C,OAAQ,OAAe,CAAC,SAAS,IAAI,aAAa,CAAA;AACpD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,OAA4B;IAC7D,MAAM,EACJ,OAAO,EACP,MAAM,EACN,qBAAqB,GAAG,KAAK,EAC7B,aAAa,GAAG,KAAK,EACrB,UAAU,GAAG,GAAG,EAChB,aAAa,GAAG,GAAG,GACpB,GAAG,OAAO,CAAA;IAEX,eAAe;IACf,IAAI,OAAe,CAAA;IACnB,IAAI,IAAU,CAAA;IAEd,IAAI,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;QACpB,IAAI,GAAG,OAAO,CAAA;QACd,OAAO,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAA;IACnC,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,OAAO,CAAC,IAAI,EAAE,CAAA;QACrB,OAAO,GAAG,MAAM,OAAO,CAAC,SAAS,EAAE,CAAA;IACrC,CAAC;IAED,2CAA2C;IAC3C,MAAM,WAAW,GAAG,MAAM,mBAAmB,CAAC;QAC5C,IAAI,EAAE,OAAO;QACb,UAAU,EAAE,aAAa;QACzB,UAAU;QACV,aAAa;KACd,CAAC,CAAA;IAEF,kEAAkE;IAClE,IAAI,OAAO,GAAG,WAAW,CAAC,YAAY,EAAE,EAAE,IAAI,WAAW,CAAA;IAEzD,iBAAiB;IACjB,IAAI,qBAAqB,EAAE,CAAC;QAC1B,IAAI,aAAa,GAAG,iBAAiB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QAC/C,IAAI,CAAC,aAAa,EAAE,CAAC;YACnB,aAAa,GAAG,IAAI,GAAG,EAAE,CAAA;YACzB,iBAAiB,CAAC,GAAG,CAAC,IAAI,EAAE,aAAa,CAAC,CAAA;QAC5C,CAAC;QAED,MAAM,WAAW,GAAG,cAAc,CAAC,OAAO,CAAC,CAAA;QAC3C,MAAM,gBAAgB,GAAG,aAAa,CAAC,GAAG,CAAC,WAAW,CAAC,CAAA;QAEvD,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACtB,aAAa,CAAC,GAAG,CAAC,WAAW,EAAE,OAAO,CAAC,CAAA;YACvC,OAAO,8GAA8G,CAAA;QACvH,CAAC;QAED,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,EAAE,gBAAgB,EAAE,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE;YAClF,OAAO,EAAE,CAAC;SACX,CAAC,CAAA;QAEF,aAAa,CAAC,GAAG,CAAC,WAAW,EAAE,OAAO,CAAC,CAAA;QAEvC,IAAI,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YAClC,OAAO,yCAAyC,CAAA;QAClD,CAAC;QACD,OAAO,KAAK,CAAA;IACd,CAAC;IAED,kCAAkC;IAClC,IAAI,aAAa,GAAG,iBAAiB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;IAC/C,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,aAAa,GAAG,IAAI,GAAG,EAAE,CAAA;QACzB,iBAAiB,CAAC,GAAG,CAAC,IAAI,EAAE,aAAa,CAAC,CAAA;IAC5C,CAAC;IACD,aAAa,CAAC,GAAG,CAAC,cAAc,CAAC,OAAO,CAAC,EAAE,OAAO,CAAC,CAAA;IAEnD,gBAAgB;IAChB,IAAI,MAAM,EAAE,CAAC;QACX,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QACjC,MAAM,YAAY,GAAa,EAAE,CAAA;QAEjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAA;YACrB,IAAI,OAAO,GAAG,KAAK,CAAA;YACnB,IAAI,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBACrB,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAC7B,CAAC;iBAAM,CAAC;gBACN,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAA;YACjC,CAAC;YAED,IAAI,OAAO,EAAE,CAAC;gBACZ,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBACpB,IAAI,YAAY,CAAC,MAAM,IAAI,EAAE;oBAAE,MAAK;YACtC,CAAC;QACH,CAAC;QAED,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC9B,OAAO,kBAAkB,CAAA;QAC3B,CAAC;QAED,mEAAmE;QACnE,MAAM,aAAa,GAAG,CAAC,CAAA;QACvB,MAAM,aAAa,GAAG,IAAI,GAAG,EAAU,CAAA;QACvC,KAAK,MAAM,GAAG,IAAI,YAAY,EAAE,CAAC;YAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,GAAG,aAAa,CAAC,CAAA;YAC9C,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,GAAG,GAAG,aAAa,CAAC,CAAA;YAC3D,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,IAAI,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;gBAClC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;YACtB,CAAC;QACH,CAAC;QAED,+DAA+D;QAC/D,MAAM,aAAa,GAAG,CAAC,GAAG,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;QAC9D,MAAM,MAAM,GAAa,EAAE,CAAA;QAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9C,MAAM,OAAO,GAAG,aAAa,CAAC,CAAC,CAAC,CAAA;YAChC,IAAI,CAAC,GAAG,CAAC,IAAI,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,OAAO,GAAG,CAAC,EAAE,CAAC;gBAClD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YACpB,CAAC;YACD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAA;QAC7B,CAAC;QAED,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAC1B,CAAC;IAED,OAAO,OAAO,CAAA;AAChB,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export interface FormatHtmlOptions {
|
|
2
|
+
html: string;
|
|
3
|
+
keepStyles?: boolean;
|
|
4
|
+
maxAttrLen?: number;
|
|
5
|
+
maxContentLen?: number;
|
|
6
|
+
}
|
|
7
|
+
export declare function formatHtmlForPrompt({ html, keepStyles, maxAttrLen, maxContentLen, }: FormatHtmlOptions): Promise<string>;
|
|
8
|
+
//# sourceMappingURL=htmlrewrite.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"htmlrewrite.d.ts","sourceRoot":"","sources":["../src/htmlrewrite.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,iBAAiB;IAC9B,IAAI,EAAE,MAAM,CAAA;IACZ,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,aAAa,CAAC,EAAE,MAAM,CAAA;CACzB;AAED,wBAAsB,mBAAmB,CAAC,EACtC,IAAI,EACJ,UAAkB,EAClB,UAAgB,EAChB,aAAmB,GACtB,EAAE,iBAAiB,mBA+OnB"}
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
import posthtml from 'posthtml';
|
|
2
|
+
import beautify from 'posthtml-beautify';
|
|
3
|
+
export async function formatHtmlForPrompt({ html, keepStyles = false, maxAttrLen = 200, maxContentLen = 500, }) {
|
|
4
|
+
const tagsToRemove = [
|
|
5
|
+
'hint',
|
|
6
|
+
'style',
|
|
7
|
+
'link',
|
|
8
|
+
'script',
|
|
9
|
+
'meta',
|
|
10
|
+
'noscript',
|
|
11
|
+
'svg',
|
|
12
|
+
'head',
|
|
13
|
+
];
|
|
14
|
+
const attributesToKeep = [
|
|
15
|
+
// Standard descriptive attributes
|
|
16
|
+
'label',
|
|
17
|
+
'title',
|
|
18
|
+
'alt',
|
|
19
|
+
'href',
|
|
20
|
+
'name',
|
|
21
|
+
'value',
|
|
22
|
+
'checked',
|
|
23
|
+
'placeholder',
|
|
24
|
+
'type',
|
|
25
|
+
'role',
|
|
26
|
+
'target',
|
|
27
|
+
// Descriptive aria attributes (text content)
|
|
28
|
+
'aria-label',
|
|
29
|
+
'aria-placeholder',
|
|
30
|
+
'aria-valuetext',
|
|
31
|
+
'aria-roledescription',
|
|
32
|
+
// Useful aria state attributes
|
|
33
|
+
'aria-hidden',
|
|
34
|
+
'aria-expanded',
|
|
35
|
+
'aria-checked',
|
|
36
|
+
'aria-selected',
|
|
37
|
+
'aria-disabled',
|
|
38
|
+
'aria-pressed',
|
|
39
|
+
'aria-required',
|
|
40
|
+
'aria-current',
|
|
41
|
+
// Test IDs (data-testid, data-test, data-cy are covered by data-* prefix)
|
|
42
|
+
'testid',
|
|
43
|
+
'test-id',
|
|
44
|
+
'vimium-label',
|
|
45
|
+
// Conditionally added: 'style', 'class'
|
|
46
|
+
];
|
|
47
|
+
if (keepStyles) {
|
|
48
|
+
attributesToKeep.push('style', 'class');
|
|
49
|
+
}
|
|
50
|
+
const truncate = (str, maxLen) => {
|
|
51
|
+
if (str.length <= maxLen)
|
|
52
|
+
return str;
|
|
53
|
+
const remaining = str.length - maxLen;
|
|
54
|
+
return str.slice(0, maxLen) + `...${remaining} more characters`;
|
|
55
|
+
};
|
|
56
|
+
// Create a custom plugin to remove tags and filter attributes
|
|
57
|
+
const removeTagsAndAttrsPlugin = () => {
|
|
58
|
+
return (tree) => {
|
|
59
|
+
// Remove comments at root level
|
|
60
|
+
tree = tree.filter((item) => {
|
|
61
|
+
if (typeof item === 'string') {
|
|
62
|
+
const trimmed = item.trim();
|
|
63
|
+
return !(trimmed.startsWith('<!--') && trimmed.endsWith('-->'));
|
|
64
|
+
}
|
|
65
|
+
return true;
|
|
66
|
+
});
|
|
67
|
+
// Process each node recursively
|
|
68
|
+
const processNode = (node) => {
|
|
69
|
+
if (typeof node === 'string') {
|
|
70
|
+
// Truncate text content
|
|
71
|
+
const trimmed = node.trim();
|
|
72
|
+
if (trimmed.length === 0)
|
|
73
|
+
return node;
|
|
74
|
+
return truncate(node, maxContentLen);
|
|
75
|
+
}
|
|
76
|
+
// Remove unwanted tags
|
|
77
|
+
if (node.tag && tagsToRemove.includes(node.tag.toLowerCase())) {
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
// Filter attributes
|
|
81
|
+
if (node.attrs) {
|
|
82
|
+
const newAttrs = {};
|
|
83
|
+
for (const [attr, value] of Object.entries(node.attrs)) {
|
|
84
|
+
const shouldKeep = attr.startsWith('data-') ||
|
|
85
|
+
attributesToKeep.includes(attr);
|
|
86
|
+
if (shouldKeep) {
|
|
87
|
+
// Truncate attribute values
|
|
88
|
+
newAttrs[attr] = typeof value === 'string'
|
|
89
|
+
? truncate(value, maxAttrLen)
|
|
90
|
+
: value;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
node.attrs = newAttrs;
|
|
94
|
+
}
|
|
95
|
+
// Process content recursively
|
|
96
|
+
if (node.content && Array.isArray(node.content)) {
|
|
97
|
+
node.content = node.content
|
|
98
|
+
.map(processNode)
|
|
99
|
+
.filter(item => {
|
|
100
|
+
if (item === null)
|
|
101
|
+
return false;
|
|
102
|
+
if (typeof item === 'string') {
|
|
103
|
+
const trimmed = item.trim();
|
|
104
|
+
return !(trimmed.startsWith('<!--') && trimmed.endsWith('-->'));
|
|
105
|
+
}
|
|
106
|
+
return true;
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
return node;
|
|
110
|
+
};
|
|
111
|
+
// Process all root nodes
|
|
112
|
+
return tree.map(processNode).filter(item => item !== null);
|
|
113
|
+
};
|
|
114
|
+
};
|
|
115
|
+
// Plugin to unwrap unnecessary nested wrapper elements
|
|
116
|
+
// e.g., <div><div><div><p>text</p></div></div></div> -> <div><p>text</p></div>
|
|
117
|
+
const unwrapNestedWrappersPlugin = () => {
|
|
118
|
+
return (tree) => {
|
|
119
|
+
const isWhitespaceOnly = (node) => {
|
|
120
|
+
return typeof node === 'string' && node.trim().length === 0;
|
|
121
|
+
};
|
|
122
|
+
const hasNoAttrs = (node) => {
|
|
123
|
+
return !node.attrs || Object.keys(node.attrs).length === 0;
|
|
124
|
+
};
|
|
125
|
+
const unwrapNode = (node) => {
|
|
126
|
+
if (typeof node === 'string')
|
|
127
|
+
return node;
|
|
128
|
+
if (!node.tag)
|
|
129
|
+
return node;
|
|
130
|
+
// First, recursively process children
|
|
131
|
+
if (node.content && Array.isArray(node.content)) {
|
|
132
|
+
node.content = node.content.map(unwrapNode);
|
|
133
|
+
}
|
|
134
|
+
// Check if this node is an unnecessary wrapper:
|
|
135
|
+
// - has no attributes
|
|
136
|
+
// - has exactly one non-whitespace child that is an element
|
|
137
|
+
if (hasNoAttrs(node) && node.content && Array.isArray(node.content)) {
|
|
138
|
+
const nonWhitespaceChildren = node.content.filter(c => !isWhitespaceOnly(c));
|
|
139
|
+
if (nonWhitespaceChildren.length === 1) {
|
|
140
|
+
const onlyChild = nonWhitespaceChildren[0];
|
|
141
|
+
// If the only child is also an element (not text), unwrap
|
|
142
|
+
if (typeof onlyChild !== 'string' && onlyChild.tag) {
|
|
143
|
+
// Replace this node with its child
|
|
144
|
+
return onlyChild;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
return node;
|
|
149
|
+
};
|
|
150
|
+
// Apply multiple passes until stable (handles deeply nested wrappers)
|
|
151
|
+
let result = tree.map(unwrapNode);
|
|
152
|
+
let prevJson = '';
|
|
153
|
+
let currJson = JSON.stringify(result);
|
|
154
|
+
while (prevJson !== currJson) {
|
|
155
|
+
prevJson = currJson;
|
|
156
|
+
result = result.map(unwrapNode);
|
|
157
|
+
currJson = JSON.stringify(result);
|
|
158
|
+
}
|
|
159
|
+
return result;
|
|
160
|
+
};
|
|
161
|
+
};
|
|
162
|
+
// Plugin to remove empty elements (no attrs, no content)
|
|
163
|
+
// Runs repeatedly until no more empty elements exist
|
|
164
|
+
const removeEmptyElementsPlugin = () => {
|
|
165
|
+
return (tree) => {
|
|
166
|
+
const isEmptyElement = (node) => {
|
|
167
|
+
if (typeof node === 'string')
|
|
168
|
+
return false;
|
|
169
|
+
if (!node.tag)
|
|
170
|
+
return false;
|
|
171
|
+
const hasAttrs = node.attrs && Object.keys(node.attrs).length > 0;
|
|
172
|
+
const hasContent = node.content && node.content.some(c => typeof c === 'string' ? c.trim().length > 0 : true);
|
|
173
|
+
return !hasAttrs && !hasContent;
|
|
174
|
+
};
|
|
175
|
+
const removeEmpty = (content) => {
|
|
176
|
+
if (!content || !Array.isArray(content))
|
|
177
|
+
return content;
|
|
178
|
+
return content
|
|
179
|
+
.map(node => {
|
|
180
|
+
if (typeof node === 'string')
|
|
181
|
+
return node;
|
|
182
|
+
if (node.content) {
|
|
183
|
+
node.content = removeEmpty(node.content);
|
|
184
|
+
}
|
|
185
|
+
return node;
|
|
186
|
+
})
|
|
187
|
+
.filter(node => !isEmptyElement(node));
|
|
188
|
+
};
|
|
189
|
+
// Apply multiple passes until stable
|
|
190
|
+
let result = removeEmpty(tree);
|
|
191
|
+
let prevJson = '';
|
|
192
|
+
let currJson = JSON.stringify(result);
|
|
193
|
+
while (prevJson !== currJson) {
|
|
194
|
+
prevJson = currJson;
|
|
195
|
+
result = removeEmpty(result);
|
|
196
|
+
currJson = JSON.stringify(result);
|
|
197
|
+
}
|
|
198
|
+
return result;
|
|
199
|
+
};
|
|
200
|
+
};
|
|
201
|
+
// Process HTML
|
|
202
|
+
const processor = posthtml()
|
|
203
|
+
.use(removeTagsAndAttrsPlugin())
|
|
204
|
+
.use(removeEmptyElementsPlugin())
|
|
205
|
+
.use(unwrapNestedWrappersPlugin())
|
|
206
|
+
.use(beautify({
|
|
207
|
+
rules: {
|
|
208
|
+
indent: 1, // 1-space indent
|
|
209
|
+
blankLines: false, // no extra blank lines
|
|
210
|
+
maxlen: 100000 // effectively never wrap by content length
|
|
211
|
+
},
|
|
212
|
+
jsBeautifyOptions: {
|
|
213
|
+
wrap_line_length: 0, // disable js-beautify wrapping
|
|
214
|
+
preserve_newlines: false // reduce stray newlines
|
|
215
|
+
}
|
|
216
|
+
}));
|
|
217
|
+
// Process with await
|
|
218
|
+
const result = await processor.process(html);
|
|
219
|
+
return result.html;
|
|
220
|
+
}
|
|
221
|
+
//# sourceMappingURL=htmlrewrite.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"htmlrewrite.js","sourceRoot":"","sources":["../src/htmlrewrite.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,UAAU,CAAA;AAC/B,OAAO,QAAQ,MAAM,mBAAmB,CAAA;AASxC,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,EACtC,IAAI,EACJ,UAAU,GAAG,KAAK,EAClB,UAAU,GAAG,GAAG,EAChB,aAAa,GAAG,GAAG,GACH;IAChB,MAAM,YAAY,GAAG;QACjB,MAAM;QACN,OAAO;QACP,MAAM;QACN,QAAQ;QACR,MAAM;QACN,UAAU;QACV,KAAK;QACL,MAAM;KACT,CAAA;IAED,MAAM,gBAAgB,GAAG;QACrB,kCAAkC;QAClC,OAAO;QACP,OAAO;QACP,KAAK;QACL,MAAM;QACN,MAAM;QACN,OAAO;QACP,SAAS;QACT,aAAa;QACb,MAAM;QACN,MAAM;QACN,QAAQ;QACR,6CAA6C;QAC7C,YAAY;QACZ,kBAAkB;QAClB,gBAAgB;QAChB,sBAAsB;QACtB,+BAA+B;QAC/B,aAAa;QACb,eAAe;QACf,cAAc;QACd,eAAe;QACf,eAAe;QACf,cAAc;QACd,eAAe;QACf,cAAc;QACd,0EAA0E;QAC1E,QAAQ;QACR,SAAS;QACT,cAAc;QACd,wCAAwC;KAC3C,CAAA;IAED,IAAI,UAAU,EAAE,CAAC;QACb,gBAAgB,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,CAAA;IAC3C,CAAC;IAED,MAAM,QAAQ,GAAG,CAAC,GAAW,EAAE,MAAc,EAAU,EAAE;QACrD,IAAI,GAAG,CAAC,MAAM,IAAI,MAAM;YAAE,OAAO,GAAG,CAAA;QACpC,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,GAAG,MAAM,CAAA;QACrC,OAAO,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,GAAG,MAAM,SAAS,kBAAkB,CAAA;IACnE,CAAC,CAAA;IAED,8DAA8D;IAC9D,MAAM,wBAAwB,GAAG,GAAG,EAAE;QAClC,OAAO,CAAC,IAAI,EAAE,EAAE;YACZ,gCAAgC;YAChC,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;gBACxB,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;oBAC3B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;oBAC3B,OAAO,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAA;gBACnE,CAAC;gBACD,OAAO,IAAI,CAAA;YACf,CAAC,CAAC,CAAA;YAEF,gCAAgC;YAChC,MAAM,WAAW,GAAG,CAAC,IAAI,EAAE,EAAE;gBACzB,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;oBAC3B,wBAAwB;oBACxB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;oBAC3B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;wBAAE,OAAO,IAAI,CAAA;oBACrC,OAAO,QAAQ,CAAC,IAAI,EAAE,aAAa,CAAC,CAAA;gBACxC,CAAC;gBAED,uBAAuB;gBACvB,IAAI,IAAI,CAAC,GAAG,IAAI,YAAY,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;oBAC5D,OAAO,IAAI,CAAA;gBACf,CAAC;gBAED,oBAAoB;gBACpB,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;oBACb,MAAM,QAAQ,GAAsB,EAAE,CAAA;oBACtC,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;wBACrD,MAAM,UAAU,GACZ,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC;4BACxB,gBAAgB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAA;wBAEnC,IAAI,UAAU,EAAE,CAAC;4BACb,4BAA4B;4BAC5B,QAAQ,CAAC,IAAI,CAAC,GAAG,OAAO,KAAK,KAAK,QAAQ;gCACtC,CAAC,CAAC,QAAQ,CAAC,KAAK,EAAE,UAAU,CAAC;gCAC7B,CAAC,CAAC,KAAK,CAAA;wBACf,CAAC;oBACL,CAAC;oBACD,IAAI,CAAC,KAAK,GAAG,QAAQ,CAAA;gBACzB,CAAC;gBAED,8BAA8B;gBAC9B,IAAI,IAAI,CAAC,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;oBAC9C,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO;yBACtB,GAAG,CAAC,WAAW,CAAC;yBAChB,MAAM,CAAC,IAAI,CAAC,EAAE;wBACX,IAAI,IAAI,KAAK,IAAI;4BAAE,OAAO,KAAK,CAAA;wBAC/B,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;4BAC3B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;4BAC3B,OAAO,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAA;wBACnE,CAAC;wBACD,OAAO,IAAI,CAAA;oBACf,CAAC,CAAC,CAAA;gBACV,CAAC;gBAED,OAAO,IAAI,CAAA;YACf,CAAC,CAAA;YAED,yBAAyB;YACzB,OAAO,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,KAAK,IAAI,CAAC,CAAA;QAC9D,CAAC,CAAA;IACL,CAAC,CAAA;IAED,uDAAuD;IACvD,+EAA+E;IAC/E,MAAM,0BAA0B,GAAG,GAAG,EAAE;QACpC,OAAO,CAAC,IAAI,EAAE,EAAE;YACZ,MAAM,gBAAgB,GAAG,CAAC,IAAI,EAAE,EAAE;gBAC9B,OAAO,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,CAAA;YAC/D,CAAC,CAAA;YAED,MAAM,UAAU,GAAG,CAAC,IAAI,EAAE,EAAE;gBACxB,OAAO,CAAC,IAAI,CAAC,KAAK,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,KAAK,CAAC,CAAA;YAC9D,CAAC,CAAA;YAED,MAAM,UAAU,GAAG,CAAC,IAAI,EAAE,EAAE;gBACxB,IAAI,OAAO,IAAI,KAAK,QAAQ;oBAAE,OAAO,IAAI,CAAA;gBACzC,IAAI,CAAC,IAAI,CAAC,GAAG;oBAAE,OAAO,IAAI,CAAA;gBAE1B,sCAAsC;gBACtC,IAAI,IAAI,CAAC,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;oBAC9C,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;gBAC/C,CAAC;gBAED,gDAAgD;gBAChD,sBAAsB;gBACtB,4DAA4D;gBAC5D,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;oBAClE,MAAM,qBAAqB,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,CAAA;oBAE5E,IAAI,qBAAqB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;wBACrC,MAAM,SAAS,GAAG,qBAAqB,CAAC,CAAC,CAAC,CAAA;wBAC1C,0DAA0D;wBAC1D,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,GAAG,EAAE,CAAC;4BACjD,mCAAmC;4BACnC,OAAO,SAAS,CAAA;wBACpB,CAAC;oBACL,CAAC;gBACL,CAAC;gBAED,OAAO,IAAI,CAAA;YACf,CAAC,CAAA;YAED,sEAAsE;YACtE,IAAI,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;YACjC,IAAI,QAAQ,GAAG,EAAE,CAAA;YACjB,IAAI,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;YACrC,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;gBAC3B,QAAQ,GAAG,QAAQ,CAAA;gBACnB,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;gBAC/B,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;YACrC,CAAC;YAED,OAAO,MAAM,CAAA;QACjB,CAAC,CAAA;IACL,CAAC,CAAA;IAED,yDAAyD;IACzD,qDAAqD;IACrD,MAAM,yBAAyB,GAAG,GAAG,EAAE;QACnC,OAAO,CAAC,IAAI,EAAE,EAAE;YACZ,MAAM,cAAc,GAAG,CAAC,IAAI,EAAE,EAAE;gBAC5B,IAAI,OAAO,IAAI,KAAK,QAAQ;oBAAE,OAAO,KAAK,CAAA;gBAC1C,IAAI,CAAC,IAAI,CAAC,GAAG;oBAAE,OAAO,KAAK,CAAA;gBAC3B,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,CAAC,CAAA;gBACjE,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CACrD,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CACrD,CAAA;gBACD,OAAO,CAAC,QAAQ,IAAI,CAAC,UAAU,CAAA;YACnC,CAAC,CAAA;YAED,MAAM,WAAW,GAAG,CAAC,OAAO,EAAE,EAAE;gBAC5B,IAAI,CAAC,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC;oBAAE,OAAO,OAAO,CAAA;gBAEvD,OAAO,OAAO;qBACT,GAAG,CAAC,IAAI,CAAC,EAAE;oBACR,IAAI,OAAO,IAAI,KAAK,QAAQ;wBAAE,OAAO,IAAI,CAAA;oBACzC,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;wBACf,IAAI,CAAC,OAAO,GAAG,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;oBAC5C,CAAC;oBACD,OAAO,IAAI,CAAA;gBACf,CAAC,CAAC;qBACD,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAA;YAC9C,CAAC,CAAA;YAED,qCAAqC;YACrC,IAAI,MAAM,GAAG,WAAW,CAAC,IAAI,CAAC,CAAA;YAC9B,IAAI,QAAQ,GAAG,EAAE,CAAA;YACjB,IAAI,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;YACrC,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;gBAC3B,QAAQ,GAAG,QAAQ,CAAA;gBACnB,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC,CAAA;gBAC5B,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;YACrC,CAAC;YAED,OAAO,MAAM,CAAA;QACjB,CAAC,CAAA;IACL,CAAC,CAAA;IAED,eAAe;IACf,MAAM,SAAS,GAAG,QAAQ,EAAE;SACvB,GAAG,CAAC,wBAAwB,EAAE,CAAC;SAC/B,GAAG,CAAC,yBAAyB,EAAE,CAAC;SAChC,GAAG,CAAC,0BAA0B,EAAE,CAAC;SACjC,GAAG,CAAC,QAAQ,CAAC;QACV,KAAK,EAAE;YACH,MAAM,EAAE,CAAC,EAAW,iBAAiB;YACrC,UAAU,EAAE,KAAK,EAAG,uBAAuB;YAC3C,MAAM,EAAE,MAAM,CAAM,2CAA2C;SAClE;QACD,iBAAiB,EAAE;YACf,gBAAgB,EAAE,CAAC,EAAM,+BAA+B;YACxD,iBAAiB,EAAE,KAAK,CAAC,wBAAwB;SACpD;KACJ,CAAC,CAAC,CAAA;IAEP,qBAAqB;IACrB,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;IAE5C,OAAO,MAAM,CAAC,IAAI,CAAA;AACtB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"htmlrewrite.test.d.ts","sourceRoot":"","sources":["../src/htmlrewrite.test.ts"],"names":[],"mappings":""}
|