@tkeron/html-parser 0.1.7 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -7
- package/bun.lock +5 -0
- package/index.ts +4 -0
- package/package.json +7 -1
- package/src/css-selector.ts +1 -1
- package/src/dom-simulator.ts +41 -17
- package/src/encoding.ts +39 -0
- package/src/index.ts +9 -0
- package/src/parser.ts +509 -143
- package/src/serializer.ts +450 -0
- package/src/tokenizer.ts +190 -118
- package/tests/advanced.test.ts +121 -108
- package/tests/custom-elements-head.test.ts +105 -0
- package/tests/dom-extended.test.ts +12 -12
- package/tests/dom-manipulation.test.ts +9 -10
- package/tests/dom.test.ts +32 -27
- package/tests/helpers/tokenizer-adapter.test.ts +70 -0
- package/tests/helpers/tokenizer-adapter.ts +65 -0
- package/tests/helpers/tree-adapter.test.ts +39 -0
- package/tests/helpers/tree-adapter.ts +60 -0
- package/tests/html5lib-data/tokenizer/namedEntities.test +42422 -0
- package/tests/html5lib-data/tokenizer/pendingSpecChanges.test +9 -0
- package/tests/html5lib-data/tree-construction/adoption01.dat +354 -0
- package/tests/html5lib-data/tree-construction/adoption02.dat +39 -0
- package/tests/html5lib-data/tree-construction/domjs-unsafe.dat +0 -0
- package/tests/html5lib-data/tree-construction/entities02.dat +309 -0
- package/tests/html5lib-data/tree-construction/html5test-com.dat +301 -0
- package/tests/html5lib-data/tree-construction/math.dat +104 -0
- package/tests/html5lib-data/tree-construction/namespace-sensitivity.dat +22 -0
- package/tests/html5lib-data/tree-construction/noscript01.dat +237 -0
- package/tests/html5lib-data/tree-construction/ruby.dat +302 -0
- package/tests/html5lib-data/tree-construction/scriptdata01.dat +372 -0
- package/tests/html5lib-data/tree-construction/svg.dat +104 -0
- package/tests/html5lib-data/tree-construction/template.dat +1673 -0
- package/tests/html5lib-data/tree-construction/tests10.dat +853 -0
- package/tests/html5lib-data/tree-construction/tests11.dat +523 -0
- package/tests/html5lib-data/tree-construction/tests20.dat +842 -0
- package/tests/html5lib-data/tree-construction/tests21.dat +306 -0
- package/tests/html5lib-data/tree-construction/tests23.dat +168 -0
- package/tests/html5lib-data/tree-construction/tests24.dat +79 -0
- package/tests/html5lib-data/tree-construction/tests5.dat +210 -0
- package/tests/html5lib-data/tree-construction/tests6.dat +663 -0
- package/tests/html5lib-data/tree-construction/tests_innerHTML_1.dat +844 -0
- package/tests/parser.test.ts +173 -193
- package/tests/serializer-core.test.ts +16 -0
- package/tests/serializer-data/core.test +125 -0
- package/tests/serializer-data/injectmeta.test +66 -0
- package/tests/serializer-data/optionaltags.test +965 -0
- package/tests/serializer-data/options.test +60 -0
- package/tests/serializer-data/whitespace.test +51 -0
- package/tests/serializer-injectmeta.test.ts +16 -0
- package/tests/serializer-optionaltags.test.ts +16 -0
- package/tests/serializer-options.test.ts +16 -0
- package/tests/serializer-whitespace.test.ts +16 -0
- package/tests/tokenizer-namedEntities.test.ts +20 -0
- package/tests/tokenizer-pendingSpecChanges.test.ts +20 -0
- package/tests/tokenizer.test.ts +25 -32
- package/tests/tree-construction-adoption01.test.ts +37 -0
- package/tests/tree-construction-adoption02.test.ts +34 -0
- package/tests/tree-construction-domjs-unsafe.test.ts +24 -0
- package/tests/tree-construction-entities02.test.ts +33 -0
- package/tests/tree-construction-html5test-com.test.ts +32 -0
- package/tests/tree-construction-math.test.ts +18 -0
- package/tests/tree-construction-namespace-sensitivity.test.ts +18 -0
- package/tests/tree-construction-noscript01.test.ts +18 -0
- package/tests/tree-construction-ruby.test.ts +21 -0
- package/tests/tree-construction-scriptdata01.test.ts +21 -0
- package/tests/tree-construction-svg.test.ts +21 -0
- package/tests/tree-construction-template.test.ts +21 -0
- package/tests/tree-construction-tests10.test.ts +21 -0
- package/tests/tree-construction-tests11.test.ts +21 -0
- package/tests/tree-construction-tests20.test.ts +18 -0
- package/tests/tree-construction-tests21.test.ts +18 -0
- package/tests/tree-construction-tests23.test.ts +18 -0
- package/tests/tree-construction-tests24.test.ts +18 -0
- package/tests/tree-construction-tests5.test.ts +21 -0
- package/tests/tree-construction-tests6.test.ts +21 -0
- package/tests/tree-construction-tests_innerHTML_1.test.ts +21 -0
- package/tests/custom-elements.test.ts +0 -745
- package/tests/official/README.md +0 -87
- package/tests/official/acid/acid-tests.test.ts +0 -309
- package/tests/official/final-output/final-output.test.ts +0 -361
- package/tests/official/html5lib/tokenizer-utils.ts +0 -192
- package/tests/official/html5lib/tokenizer.test.ts +0 -171
- package/tests/official/html5lib/tree-construction-utils.ts +0 -194
- package/tests/official/html5lib/tree-construction.test.ts +0 -250
- package/tests/official/validator/validator-tests.test.ts +0 -237
- package/tests/official/validator-nu/validator-nu.test.ts +0 -335
- package/tests/official/whatwg/whatwg-tests.test.ts +0 -205
- package/tests/official/wpt/wpt-tests.test.ts +0 -409
|
@@ -0,0 +1,450 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Serializes a list of HTML5 tokens to an HTML string.
|
|
3
|
+
* Based on HTML5 serialization algorithm.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
function escapeText(text: string): string {
|
|
7
|
+
return text.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function escapeAttributeValue(value: string): string {
|
|
11
|
+
return value.replace(/&/g, '&').replace(/"/g, '"').replace(/'/g, ''');
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function needsQuotes(value: string): boolean {
|
|
15
|
+
return value === '' || /[\t\n\r\f "'=`>]/.test(value);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function serializeAttribute(name: string, value: string, options?: { quote_char?: string; quote_attr_values?: boolean; minimize_boolean_attributes?: boolean; escape_lt_in_attrs?: boolean; escape_rcdata?: boolean }): string {
|
|
19
|
+
if ((options?.minimize_boolean_attributes !== false) && value === name) {
|
|
20
|
+
return name;
|
|
21
|
+
}
|
|
22
|
+
const needsQuote = needsQuotes(value) || options?.quote_attr_values;
|
|
23
|
+
if (!needsQuote) {
|
|
24
|
+
return `${name}=${value}`;
|
|
25
|
+
}
|
|
26
|
+
let escaped = value.replace(/&/g, '&');
|
|
27
|
+
if (options?.escape_lt_in_attrs) {
|
|
28
|
+
escaped = escaped.replace(/</g, '<');
|
|
29
|
+
}
|
|
30
|
+
const forcedQuote = options?.quote_char;
|
|
31
|
+
if (forcedQuote) {
|
|
32
|
+
if (forcedQuote === "'") {
|
|
33
|
+
escaped = escaped.replace(/'/g, ''');
|
|
34
|
+
} else {
|
|
35
|
+
escaped = escaped.replace(/"/g, '"');
|
|
36
|
+
}
|
|
37
|
+
return `${name}=${forcedQuote}${escaped}${forcedQuote}`;
|
|
38
|
+
} else {
|
|
39
|
+
// Auto choose quote
|
|
40
|
+
if (value.includes('"') && value.includes("'")) {
|
|
41
|
+
escaped = escaped.replace(/"/g, '"');
|
|
42
|
+
return `${name}="${escaped}"`;
|
|
43
|
+
} else if (value.includes('"')) {
|
|
44
|
+
return `${name}='${escaped}'`;
|
|
45
|
+
} else {
|
|
46
|
+
escaped = escaped.replace(/"/g, '"');
|
|
47
|
+
return `${name}="${escaped}"`;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function serializeAttributes(attrs: any, options?: { quote_char?: string; quote_attr_values?: boolean; minimize_boolean_attributes?: boolean; escape_lt_in_attrs?: boolean; use_trailing_solidus?: boolean; escape_rcdata?: boolean }): string {
|
|
53
|
+
let attrList: [string, string][];
|
|
54
|
+
if (Array.isArray(attrs)) {
|
|
55
|
+
attrList = attrs.map((attr: any) => [attr.name, attr.value]);
|
|
56
|
+
} else {
|
|
57
|
+
attrList = attrs ? Object.entries(attrs) : [];
|
|
58
|
+
}
|
|
59
|
+
attrList.sort(([a], [b]) => a.localeCompare(b));
|
|
60
|
+
return attrList.map(([name, value]) => ' ' + serializeAttribute(name, value, options)).join('');
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function serializeTokens(tokens: any[], options?: { inject_meta_charset?: boolean; encoding?: string; quote_char?: string; quote_attr_values?: boolean; minimize_boolean_attributes?: boolean; escape_lt_in_attrs?: boolean; use_trailing_solidus?: boolean; escape_rcdata?: boolean; strip_whitespace?: boolean }): string {
|
|
64
|
+
const encoding = options?.encoding || 'utf-8';
|
|
65
|
+
let result = '';
|
|
66
|
+
let inScript = false;
|
|
67
|
+
let inPre = false;
|
|
68
|
+
let inTextarea = false;
|
|
69
|
+
let inStyle = false;
|
|
70
|
+
let serializingHead = true;
|
|
71
|
+
|
|
72
|
+
// If inject_meta_charset, modify tokens
|
|
73
|
+
let processedTokens = tokens;
|
|
74
|
+
if (options?.inject_meta_charset) {
|
|
75
|
+
let hasCharset = false;
|
|
76
|
+
let modifiedTokens: any[] = [];
|
|
77
|
+
let inHead = false;
|
|
78
|
+
|
|
79
|
+
// First pass: check if has charset
|
|
80
|
+
for (const token of tokens) {
|
|
81
|
+
const type = token[0];
|
|
82
|
+
if (type === 'StartTag' && token[2] === 'head') {
|
|
83
|
+
inHead = true;
|
|
84
|
+
} else if (type === 'EndTag' && token[2] === 'head') {
|
|
85
|
+
inHead = false;
|
|
86
|
+
} else if (inHead && type === 'EmptyTag' && token[1] === 'meta') {
|
|
87
|
+
const attrs = token[2];
|
|
88
|
+
if (attrs.some((attr: any) => attr.name === 'charset')) {
|
|
89
|
+
hasCharset = true;
|
|
90
|
+
}
|
|
91
|
+
const hasHttpEquiv = attrs.some((attr: any) => attr.name === 'http-equiv' && attr.value === 'content-type');
|
|
92
|
+
if (hasHttpEquiv) {
|
|
93
|
+
const contentAttr = attrs.find((attr: any) => attr.name === 'content');
|
|
94
|
+
if (contentAttr && contentAttr.value.includes('charset=')) {
|
|
95
|
+
hasCharset = true;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Second pass: modify
|
|
102
|
+
inHead = false;
|
|
103
|
+
for (const token of tokens) {
|
|
104
|
+
const type = token[0];
|
|
105
|
+
if (type === 'StartTag' && token[2] === 'head') {
|
|
106
|
+
inHead = true;
|
|
107
|
+
modifiedTokens.push(token);
|
|
108
|
+
if (!hasCharset && options?.encoding) {
|
|
109
|
+
modifiedTokens.push(['EmptyTag', 'meta', [{ name: 'charset', value: encoding }]]);
|
|
110
|
+
}
|
|
111
|
+
} else if (type === 'EndTag' && token[2] === 'head') {
|
|
112
|
+
inHead = false;
|
|
113
|
+
modifiedTokens.push(token);
|
|
114
|
+
} else if (inHead && type === 'EmptyTag' && token[1] === 'meta') {
|
|
115
|
+
let newAttrs = token[2].slice();
|
|
116
|
+
let isHttpEquiv = false;
|
|
117
|
+
for (let i = 0; i < newAttrs.length; i++) {
|
|
118
|
+
const attr = newAttrs[i];
|
|
119
|
+
if (attr.name === 'charset' && options?.encoding) {
|
|
120
|
+
newAttrs[i] = { name: 'charset', value: encoding };
|
|
121
|
+
} else if (attr.name === 'http-equiv' && attr.value === 'content-type') {
|
|
122
|
+
isHttpEquiv = true;
|
|
123
|
+
} else if (attr.name === 'content' && isHttpEquiv && options?.encoding) {
|
|
124
|
+
newAttrs[i] = { name: 'content', value: attr.value.replace(/charset=[^;]*/, 'charset=' + encoding) };
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
modifiedTokens.push([type, token[1], newAttrs]);
|
|
128
|
+
} else {
|
|
129
|
+
modifiedTokens.push(token);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
processedTokens = modifiedTokens;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Serialize
|
|
136
|
+
let omitHtml = false;
|
|
137
|
+
let omitHead = false;
|
|
138
|
+
let omitBody = false;
|
|
139
|
+
let omitColgroup = false;
|
|
140
|
+
let omitTbody = false;
|
|
141
|
+
let headHasContent = false;
|
|
142
|
+
let inHead = false;
|
|
143
|
+
// First pass to detect optional tags
|
|
144
|
+
let htmlStartIndex = -1;
|
|
145
|
+
let headStartIndex = -1;
|
|
146
|
+
let bodyStartIndex = -1;
|
|
147
|
+
let colgroupStartIndex = -1;
|
|
148
|
+
let tbodyStartIndex = -1;
|
|
149
|
+
let tbodyCount = 0;
|
|
150
|
+
let colgroupCount = 0;
|
|
151
|
+
for (let i = 0; i < processedTokens.length; i++) {
|
|
152
|
+
const token = processedTokens[i];
|
|
153
|
+
const type = token[0];
|
|
154
|
+
if (type === 'StartTag') {
|
|
155
|
+
const name = token[2];
|
|
156
|
+
if (name === 'html') {
|
|
157
|
+
htmlStartIndex = i;
|
|
158
|
+
}
|
|
159
|
+
if (name === 'head') {
|
|
160
|
+
headStartIndex = i;
|
|
161
|
+
}
|
|
162
|
+
if (name === 'body') {
|
|
163
|
+
bodyStartIndex = i;
|
|
164
|
+
}
|
|
165
|
+
if (name === 'colgroup') {
|
|
166
|
+
colgroupStartIndex = i;
|
|
167
|
+
colgroupCount++;
|
|
168
|
+
}
|
|
169
|
+
if (name === 'tbody') {
|
|
170
|
+
tbodyStartIndex = i;
|
|
171
|
+
tbodyCount++;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
// Check if html should be omitted
|
|
176
|
+
if (htmlStartIndex >= 0) {
|
|
177
|
+
const htmlToken = processedTokens[htmlStartIndex];
|
|
178
|
+
const attrs = htmlToken[3];
|
|
179
|
+
const hasAttributes = Array.isArray(attrs) ? attrs.length > 0 : (attrs ? Object.keys(attrs).length > 0 : false);
|
|
180
|
+
if (hasAttributes) {
|
|
181
|
+
omitHtml = false;
|
|
182
|
+
} else {
|
|
183
|
+
let firstToken = null;
|
|
184
|
+
for (let j = htmlStartIndex + 1; j < processedTokens.length; j++) {
|
|
185
|
+
const t = processedTokens[j];
|
|
186
|
+
if (t[0] !== 'Characters' || t[1].trim() !== '') {
|
|
187
|
+
firstToken = t;
|
|
188
|
+
break;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
if (!firstToken) {
|
|
192
|
+
omitHtml = true;
|
|
193
|
+
} else if (firstToken[0] === 'Comment') {
|
|
194
|
+
omitHtml = false;
|
|
195
|
+
} else if (firstToken[0] === 'Characters') {
|
|
196
|
+
if (/^\s/.test(firstToken[1])) {
|
|
197
|
+
omitHtml = false;
|
|
198
|
+
} else {
|
|
199
|
+
omitHtml = true;
|
|
200
|
+
}
|
|
201
|
+
} else {
|
|
202
|
+
omitHtml = true;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
// Check if head should be omitted
|
|
207
|
+
if (headStartIndex >= 0) {
|
|
208
|
+
let firstToken = null;
|
|
209
|
+
for (let j = headStartIndex + 1; j < processedTokens.length; j++) {
|
|
210
|
+
const t = processedTokens[j];
|
|
211
|
+
if (t[0] !== 'Characters' || t[1].trim() !== '') {
|
|
212
|
+
firstToken = t;
|
|
213
|
+
break;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
omitHead = false;
|
|
217
|
+
if (firstToken) {
|
|
218
|
+
if (firstToken[0] === 'StartTag') {
|
|
219
|
+
omitHead = true;
|
|
220
|
+
} else if (firstToken[0] === 'EndTag' && firstToken[2] === 'head') {
|
|
221
|
+
omitHead = true;
|
|
222
|
+
} else if (firstToken[0] === 'EmptyTag') {
|
|
223
|
+
omitHead = true;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
// Check if body should be omitted
|
|
228
|
+
if (bodyStartIndex >= 0) {
|
|
229
|
+
let firstToken = null;
|
|
230
|
+
for (let j = bodyStartIndex + 1; j < processedTokens.length; j++) {
|
|
231
|
+
const t = processedTokens[j];
|
|
232
|
+
if (t[0] !== 'Characters' || t[1].trim() !== '') {
|
|
233
|
+
firstToken = t;
|
|
234
|
+
break;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
omitBody = false;
|
|
238
|
+
if (firstToken) {
|
|
239
|
+
if (firstToken[0] === 'StartTag') {
|
|
240
|
+
omitBody = true;
|
|
241
|
+
} else if (firstToken[0] === 'EndTag') {
|
|
242
|
+
omitBody = true;
|
|
243
|
+
} else if (firstToken[0] === 'Characters' && !/^\s/.test(firstToken[1])) {
|
|
244
|
+
omitBody = true;
|
|
245
|
+
}
|
|
246
|
+
} else {
|
|
247
|
+
omitBody = true;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
// Check if colgroup should be omitted
|
|
251
|
+
if (colgroupStartIndex >= 0) {
|
|
252
|
+
const colgroupToken = processedTokens[colgroupStartIndex];
|
|
253
|
+
const attrs = colgroupToken[3];
|
|
254
|
+
const hasAttributes = Array.isArray(attrs) ? attrs.length > 0 : (attrs ? Object.keys(attrs).length > 0 : false);
|
|
255
|
+
let firstToken = null;
|
|
256
|
+
for (let j = colgroupStartIndex + 1; j < processedTokens.length; j++) {
|
|
257
|
+
const t = processedTokens[j];
|
|
258
|
+
if (t[0] !== 'Characters' || t[1].trim() !== '') {
|
|
259
|
+
firstToken = t;
|
|
260
|
+
break;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
omitColgroup = !hasAttributes && firstToken && (firstToken[0] === 'StartTag' || firstToken[0] === 'EmptyTag') && ((firstToken[0] === 'StartTag' ? firstToken[2] : firstToken[1]) === 'col');
|
|
264
|
+
}
|
|
265
|
+
// Check if tbody should be omitted - we'll check this per tbody in the loop
|
|
266
|
+
// omitTbody is now calculated per element
|
|
267
|
+
|
|
268
|
+
for (let i = 0; i < processedTokens.length; i++) {
|
|
269
|
+
const token = processedTokens[i];
|
|
270
|
+
const nextToken = processedTokens[i + 1];
|
|
271
|
+
const type = token[0];
|
|
272
|
+
switch (type) {
|
|
273
|
+
case 'StartTag':
|
|
274
|
+
const [, , name, attrs] = token;
|
|
275
|
+
const attrCount = Array.isArray(attrs) ? attrs.length : (attrs ? Object.keys(attrs).length : 0);
|
|
276
|
+
|
|
277
|
+
// Check if tbody should be omitted for this specific tbody
|
|
278
|
+
let omitThisTbody = false;
|
|
279
|
+
if (name === 'tbody') {
|
|
280
|
+
const hasAttributes = Array.isArray(attrs) ? attrs.length > 0 : (attrs ? Object.keys(attrs).length > 0 : false);
|
|
281
|
+
if (!hasAttributes) {
|
|
282
|
+
// Check if first significant token after tbody is a tr
|
|
283
|
+
let firstToken = null;
|
|
284
|
+
for (let j = i + 1; j < processedTokens.length; j++) {
|
|
285
|
+
const t = processedTokens[j];
|
|
286
|
+
if (t[0] !== 'Characters' || t[1].trim() !== '') {
|
|
287
|
+
firstToken = t;
|
|
288
|
+
break;
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
const hasTrChild = firstToken && (firstToken[0] === 'StartTag' || firstToken[0] === 'EmptyTag') && firstToken[2] === 'tr';
|
|
292
|
+
|
|
293
|
+
if (hasTrChild) {
|
|
294
|
+
// Check if not preceded by tbody, thead, or tfoot
|
|
295
|
+
// This is indicated by whether the fragment starts with EndTag of those elements
|
|
296
|
+
let isPreceded = false;
|
|
297
|
+
for (let j = 0; j < i; j++) {
|
|
298
|
+
const t = processedTokens[j];
|
|
299
|
+
if (t[0] === 'Characters' && t[1].trim() === '') continue;
|
|
300
|
+
if (t[0] === 'EndTag' && ['tbody', 'thead', 'tfoot'].includes(t[2])) {
|
|
301
|
+
isPreceded = true;
|
|
302
|
+
}
|
|
303
|
+
break; // Only check the first significant token
|
|
304
|
+
}
|
|
305
|
+
omitThisTbody = !isPreceded;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
if (name === 'colgroup' && omitColgroup) continue;
|
|
311
|
+
if (name === 'tbody' && omitThisTbody) continue;
|
|
312
|
+
if (name === 'head' && omitHead) continue;
|
|
313
|
+
if (name === 'body' && omitBody) continue;
|
|
314
|
+
if (name === 'html' && omitHtml) continue;
|
|
315
|
+
if (name === 'pre') inPre = true;
|
|
316
|
+
if (name === 'textarea') inTextarea = true;
|
|
317
|
+
if (name === 'script') inScript = true;
|
|
318
|
+
if (name === 'style') inStyle = true;
|
|
319
|
+
if (name === 'head') {
|
|
320
|
+
if (options?.inject_meta_charset) {
|
|
321
|
+
serializingHead = true;
|
|
322
|
+
} else {
|
|
323
|
+
result += '<' + name + serializeAttributes(attrs, options) + '>';
|
|
324
|
+
}
|
|
325
|
+
} else if (serializingHead) {
|
|
326
|
+
result += '<' + name + serializeAttributes(attrs, options) + '>';
|
|
327
|
+
}
|
|
328
|
+
break;
|
|
329
|
+
case 'EmptyTag':
|
|
330
|
+
const [, name2, attrs2] = token;
|
|
331
|
+
result += '<' + name2 + serializeAttributes(attrs2, options) + (options?.use_trailing_solidus ? ' />' : '>');
|
|
332
|
+
break;
|
|
333
|
+
case 'EndTag':
|
|
334
|
+
const [, , name3] = token;
|
|
335
|
+
// Check if end-tag should be omitted
|
|
336
|
+
let omitEndTag = false;
|
|
337
|
+
if (['html', 'head', 'body'].includes(name3)) {
|
|
338
|
+
if (!nextToken || nextToken[0] === 'StartTag' || nextToken[0] === 'EndTag' || (nextToken[0] === 'Characters' && !/^\s/.test(nextToken[1]))) {
|
|
339
|
+
omitEndTag = true;
|
|
340
|
+
}
|
|
341
|
+
} else if (nextToken) {
|
|
342
|
+
const nextType = nextToken[0];
|
|
343
|
+
let nextName = null;
|
|
344
|
+
if (nextType === 'StartTag' || nextType === 'EndTag') {
|
|
345
|
+
nextName = nextToken[2];
|
|
346
|
+
} else if (nextType === 'EmptyTag') {
|
|
347
|
+
nextName = nextToken[1];
|
|
348
|
+
}
|
|
349
|
+
if (nextType === 'EndTag') {
|
|
350
|
+
omitEndTag = ['p', 'li', 'option', 'optgroup', 'tbody', 'tfoot', 'tr', 'td', 'th', 'colgroup', 'dd'].includes(name3);
|
|
351
|
+
} else if (nextType === 'StartTag') {
|
|
352
|
+
if (name3 === 'p' && ['address', 'article', 'aside', 'blockquote', 'datagrid', 'dialog', 'dir', 'div', 'dl', 'fieldset', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hr', 'menu', 'nav', 'ol', 'p', 'pre', 'section', 'table', 'ul'].includes(nextName)) {
|
|
353
|
+
omitEndTag = true;
|
|
354
|
+
} else if (name3 === 'li' && nextName === 'li') {
|
|
355
|
+
omitEndTag = true;
|
|
356
|
+
} else if ((name3 === 'dt' || name3 === 'dd') && (nextName === 'dt' || nextName === 'dd')) {
|
|
357
|
+
omitEndTag = true;
|
|
358
|
+
} else if (name3 === 'option' && (nextName === 'option' || nextName === 'optgroup')) {
|
|
359
|
+
omitEndTag = true;
|
|
360
|
+
} else if (name3 === 'optgroup' && nextName === 'optgroup') {
|
|
361
|
+
omitEndTag = true;
|
|
362
|
+
} else if ((name3 === 'tbody' || name3 === 'tfoot') && (nextName === 'tbody' || nextName === 'tfoot')) {
|
|
363
|
+
omitEndTag = true;
|
|
364
|
+
} else if (name3 === 'thead' && (nextName === 'tbody' || nextName === 'tfoot')) {
|
|
365
|
+
omitEndTag = true;
|
|
366
|
+
} else if (name3 === 'tr' && nextName === 'tr') {
|
|
367
|
+
omitEndTag = true;
|
|
368
|
+
} else if ((name3 === 'td' || name3 === 'th') && (nextName === 'td' || nextName === 'th')) {
|
|
369
|
+
omitEndTag = true;
|
|
370
|
+
} else if (name3 === 'colgroup' && nextName !== 'colgroup') {
|
|
371
|
+
omitEndTag = true;
|
|
372
|
+
}
|
|
373
|
+
if (name3 === 'p' && nextName === 'hr') {
|
|
374
|
+
omitEndTag = true;
|
|
375
|
+
}
|
|
376
|
+
} else if (nextType === 'EmptyTag') {
|
|
377
|
+
if (name3 === 'p' && nextName === 'hr') {
|
|
378
|
+
omitEndTag = true;
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
if (name3 === 'colgroup' && nextType === 'Characters' && !/^\s/.test(nextToken[1])) {
|
|
382
|
+
omitEndTag = true;
|
|
383
|
+
}
|
|
384
|
+
} else {
|
|
385
|
+
// At EOF, omit certain end-tags
|
|
386
|
+
omitEndTag = ['p', 'li', 'option', 'optgroup', 'tbody', 'tfoot', 'tr', 'td', 'th', 'colgroup', 'dd'].includes(name3);
|
|
387
|
+
}
|
|
388
|
+
if (omitEndTag) continue;
|
|
389
|
+
if (name3 === 'script') inScript = false;
|
|
390
|
+
if (name3 === 'pre') inPre = false;
|
|
391
|
+
if (name3 === 'textarea') inTextarea = false;
|
|
392
|
+
if (name3 === 'style') inStyle = false;
|
|
393
|
+
if (name3 === 'head') {
|
|
394
|
+
if (options?.inject_meta_charset) {
|
|
395
|
+
serializingHead = false;
|
|
396
|
+
} else {
|
|
397
|
+
result += '</' + name3 + '>';
|
|
398
|
+
}
|
|
399
|
+
} else if (serializingHead) {
|
|
400
|
+
result += '</' + name3 + '>';
|
|
401
|
+
}
|
|
402
|
+
break;
|
|
403
|
+
case 'Characters':
|
|
404
|
+
if (serializingHead) {
|
|
405
|
+
let text = token[1];
|
|
406
|
+
if (options?.strip_whitespace && !inPre && !inTextarea && !inScript && !inStyle) {
|
|
407
|
+
text = text.replace(/\s+/g, ' ');
|
|
408
|
+
}
|
|
409
|
+
if (inScript) {
|
|
410
|
+
if (options?.escape_rcdata) {
|
|
411
|
+
result += escapeText(text);
|
|
412
|
+
} else {
|
|
413
|
+
result += text;
|
|
414
|
+
}
|
|
415
|
+
} else if (inTextarea) {
|
|
416
|
+
if (options?.escape_rcdata) {
|
|
417
|
+
result += escapeText(text);
|
|
418
|
+
} else {
|
|
419
|
+
result += text;
|
|
420
|
+
}
|
|
421
|
+
} else {
|
|
422
|
+
result += escapeText(text);
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
break;
|
|
426
|
+
case 'Doctype':
|
|
427
|
+
if (serializingHead) {
|
|
428
|
+
result += '<!DOCTYPE ' + token[1];
|
|
429
|
+
if (token[2]) {
|
|
430
|
+
result += ' PUBLIC "' + token[2] + '"';
|
|
431
|
+
if (token[3]) result += ' "' + token[3] + '"';
|
|
432
|
+
} else if (token[3]) {
|
|
433
|
+
result += ' SYSTEM "' + token[3] + '"';
|
|
434
|
+
}
|
|
435
|
+
result += '>';
|
|
436
|
+
}
|
|
437
|
+
break;
|
|
438
|
+
case 'Comment':
|
|
439
|
+
if (serializingHead) {
|
|
440
|
+
result += '<!--' + token[1] + '-->';
|
|
441
|
+
}
|
|
442
|
+
break;
|
|
443
|
+
default:
|
|
444
|
+
// Ignore unknown tokens
|
|
445
|
+
break;
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
return result;
|
|
450
|
+
}
|