@tkeron/html-parser 1.1.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm_deploy.yml +14 -4
- package/README.md +6 -6
- package/bun.lock +6 -8
- package/check-versions.ts +147 -0
- package/index.ts +4 -8
- package/package.json +5 -6
- package/src/dom-simulator/append-child.ts +130 -0
- package/src/dom-simulator/append.ts +18 -0
- package/src/dom-simulator/attributes.ts +23 -0
- package/src/dom-simulator/clone-node.ts +51 -0
- package/src/dom-simulator/convert-ast-node-to-dom.ts +37 -0
- package/src/dom-simulator/create-cdata.ts +18 -0
- package/src/dom-simulator/create-comment.ts +23 -0
- package/src/dom-simulator/create-doctype.ts +24 -0
- package/src/dom-simulator/create-document.ts +81 -0
- package/src/dom-simulator/create-element.ts +195 -0
- package/src/dom-simulator/create-processing-instruction.ts +19 -0
- package/src/dom-simulator/create-temp-parent.ts +9 -0
- package/src/dom-simulator/create-text-node.ts +23 -0
- package/src/dom-simulator/escape-text-content.ts +6 -0
- package/src/dom-simulator/find-special-elements.ts +14 -0
- package/src/dom-simulator/get-text-content.ts +18 -0
- package/src/dom-simulator/index.ts +36 -0
- package/src/dom-simulator/inner-outer-html.ts +182 -0
- package/src/dom-simulator/insert-after.ts +20 -0
- package/src/dom-simulator/insert-before.ts +108 -0
- package/src/dom-simulator/matches.ts +26 -0
- package/src/dom-simulator/node-types.ts +26 -0
- package/src/dom-simulator/prepend.ts +24 -0
- package/src/dom-simulator/remove-child.ts +68 -0
- package/src/dom-simulator/remove.ts +7 -0
- package/src/dom-simulator/replace-child.ts +152 -0
- package/src/dom-simulator/set-text-content.ts +33 -0
- package/src/dom-simulator/update-element-content.ts +56 -0
- package/src/dom-simulator.ts +12 -1126
- package/src/encoding/constants.ts +8 -0
- package/src/encoding/detect-encoding.ts +21 -0
- package/src/encoding/index.ts +1 -0
- package/src/encoding/normalize-encoding.ts +6 -0
- package/src/html-entities.ts +2127 -0
- package/src/index.ts +5 -5
- package/src/parser/adoption-agency-helpers.ts +145 -0
- package/src/parser/constants.ts +137 -0
- package/src/parser/dom-to-ast.ts +79 -0
- package/src/parser/index.ts +9 -0
- package/src/parser/parse.ts +772 -0
- package/src/parser/types.ts +56 -0
- package/src/selectors/find-elements-descendant.ts +47 -0
- package/src/selectors/index.ts +2 -0
- package/src/selectors/matches-selector.ts +12 -0
- package/src/selectors/matches-token.ts +27 -0
- package/src/selectors/parse-selector.ts +48 -0
- package/src/selectors/query-selector-all.ts +43 -0
- package/src/selectors/query-selector.ts +6 -0
- package/src/selectors/types.ts +10 -0
- package/src/serializer/attributes.ts +74 -0
- package/src/serializer/escape.ts +13 -0
- package/src/serializer/index.ts +1 -0
- package/src/serializer/serialize-tokens.ts +511 -0
- package/src/tokenizer/calculate-position.ts +10 -0
- package/src/tokenizer/constants.ts +11 -0
- package/src/tokenizer/decode-entities.ts +64 -0
- package/src/tokenizer/index.ts +2 -0
- package/src/tokenizer/parse-attributes.ts +74 -0
- package/src/tokenizer/tokenize.ts +165 -0
- package/src/tokenizer/types.ts +25 -0
- package/tests/adoption-agency-helpers.test.ts +304 -0
- package/tests/advanced.test.ts +242 -221
- package/tests/cloneNode.test.ts +19 -66
- package/tests/custom-elements-head.test.ts +54 -55
- package/tests/dom-extended.test.ts +77 -64
- package/tests/dom-manipulation.test.ts +51 -24
- package/tests/dom.test.ts +15 -13
- package/tests/encoding/detect-encoding.test.ts +33 -0
- package/tests/google-dom.test.ts +2 -2
- package/tests/helpers/tokenizer-adapter.test.ts +29 -43
- package/tests/helpers/tokenizer-adapter.ts +36 -33
- package/tests/helpers/tree-adapter.test.ts +20 -20
- package/tests/helpers/tree-adapter.ts +34 -24
- package/tests/html-entities-text.test.ts +6 -2
- package/tests/innerhtml-void-elements.test.ts +52 -36
- package/tests/outerHTML-replacement.test.ts +37 -65
- package/tests/parser/dom-to-ast.test.ts +109 -0
- package/tests/parser/parse.test.ts +139 -0
- package/tests/parser.test.ts +281 -217
- package/tests/selectors/query-selector-all.test.ts +39 -0
- package/tests/selectors/query-selector.test.ts +42 -0
- package/tests/serializer/attributes.test.ts +132 -0
- package/tests/serializer/escape.test.ts +51 -0
- package/tests/serializer/serialize-tokens.test.ts +80 -0
- package/tests/serializer-core.test.ts +6 -6
- package/tests/serializer-injectmeta.test.ts +6 -6
- package/tests/serializer-optionaltags.test.ts +9 -6
- package/tests/serializer-options.test.ts +6 -6
- package/tests/serializer-whitespace.test.ts +6 -6
- package/tests/tokenizer/calculate-position.test.ts +34 -0
- package/tests/tokenizer/decode-entities.test.ts +31 -0
- package/tests/tokenizer/parse-attributes.test.ts +44 -0
- package/tests/tokenizer/tokenize.test.ts +757 -0
- package/tests/tokenizer-namedEntities.test.ts +10 -7
- package/tests/tokenizer-pendingSpecChanges.test.ts +10 -7
- package/tests/tokenizer.test.ts +268 -256
- package/tests/tree-construction-adoption01.test.ts +25 -16
- package/tests/tree-construction-adoption02.test.ts +30 -19
- package/tests/tree-construction-domjs-unsafe.test.ts +6 -4
- package/tests/tree-construction-entities02.test.ts +18 -16
- package/tests/tree-construction-html5test-com.test.ts +16 -10
- package/tests/tree-construction-math.test.ts +11 -9
- package/tests/tree-construction-namespace-sensitivity.test.ts +11 -9
- package/tests/tree-construction-noscript01.test.ts +11 -9
- package/tests/tree-construction-ruby.test.ts +6 -4
- package/tests/tree-construction-scriptdata01.test.ts +6 -4
- package/tests/tree-construction-svg.test.ts +6 -4
- package/tests/tree-construction-template.test.ts +6 -4
- package/tests/tree-construction-tests10.test.ts +6 -4
- package/tests/tree-construction-tests11.test.ts +6 -4
- package/tests/tree-construction-tests20.test.ts +7 -4
- package/tests/tree-construction-tests21.test.ts +7 -4
- package/tests/tree-construction-tests23.test.ts +7 -4
- package/tests/tree-construction-tests24.test.ts +7 -4
- package/tests/tree-construction-tests5.test.ts +6 -5
- package/tests/tree-construction-tests6.test.ts +6 -5
- package/tests/tree-construction-tests_innerHTML_1.test.ts +6 -5
- package/tests/void-elements.test.ts +85 -40
- package/tsconfig.json +1 -1
- package/src/css-selector.ts +0 -185
- package/src/encoding.ts +0 -39
- package/src/parser.ts +0 -682
- package/src/serializer.ts +0 -450
- package/src/tokenizer.ts +0 -325
- package/tests/selectors.test.ts +0 -128
package/src/serializer.ts
DELETED
|
@@ -1,450 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Serializes a list of HTML5 tokens to an HTML string.
|
|
3
|
-
* Based on HTML5 serialization algorithm.
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
function escapeText(text: string): string {
|
|
7
|
-
return text.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
function escapeAttributeValue(value: string): string {
|
|
11
|
-
return value.replace(/&/g, '&').replace(/"/g, '"').replace(/'/g, ''');
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
function needsQuotes(value: string): boolean {
|
|
15
|
-
return value === '' || /[\t\n\r\f "'=`>]/.test(value);
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
function serializeAttribute(name: string, value: string, options?: { quote_char?: string; quote_attr_values?: boolean; minimize_boolean_attributes?: boolean; escape_lt_in_attrs?: boolean; escape_rcdata?: boolean }): string {
|
|
19
|
-
if ((options?.minimize_boolean_attributes !== false) && value === name) {
|
|
20
|
-
return name;
|
|
21
|
-
}
|
|
22
|
-
const needsQuote = needsQuotes(value) || options?.quote_attr_values;
|
|
23
|
-
if (!needsQuote) {
|
|
24
|
-
return `${name}=${value}`;
|
|
25
|
-
}
|
|
26
|
-
let escaped = value.replace(/&/g, '&');
|
|
27
|
-
if (options?.escape_lt_in_attrs) {
|
|
28
|
-
escaped = escaped.replace(/</g, '<');
|
|
29
|
-
}
|
|
30
|
-
const forcedQuote = options?.quote_char;
|
|
31
|
-
if (forcedQuote) {
|
|
32
|
-
if (forcedQuote === "'") {
|
|
33
|
-
escaped = escaped.replace(/'/g, ''');
|
|
34
|
-
} else {
|
|
35
|
-
escaped = escaped.replace(/"/g, '"');
|
|
36
|
-
}
|
|
37
|
-
return `${name}=${forcedQuote}${escaped}${forcedQuote}`;
|
|
38
|
-
} else {
|
|
39
|
-
// Auto choose quote
|
|
40
|
-
if (value.includes('"') && value.includes("'")) {
|
|
41
|
-
escaped = escaped.replace(/"/g, '"');
|
|
42
|
-
return `${name}="${escaped}"`;
|
|
43
|
-
} else if (value.includes('"')) {
|
|
44
|
-
return `${name}='${escaped}'`;
|
|
45
|
-
} else {
|
|
46
|
-
escaped = escaped.replace(/"/g, '"');
|
|
47
|
-
return `${name}="${escaped}"`;
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
function serializeAttributes(attrs: any, options?: { quote_char?: string; quote_attr_values?: boolean; minimize_boolean_attributes?: boolean; escape_lt_in_attrs?: boolean; use_trailing_solidus?: boolean; escape_rcdata?: boolean }): string {
|
|
53
|
-
let attrList: [string, string][];
|
|
54
|
-
if (Array.isArray(attrs)) {
|
|
55
|
-
attrList = attrs.map((attr: any) => [attr.name, attr.value]);
|
|
56
|
-
} else {
|
|
57
|
-
attrList = attrs ? Object.entries(attrs) : [];
|
|
58
|
-
}
|
|
59
|
-
attrList.sort(([a], [b]) => a.localeCompare(b));
|
|
60
|
-
return attrList.map(([name, value]) => ' ' + serializeAttribute(name, value, options)).join('');
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
export function serializeTokens(tokens: any[], options?: { inject_meta_charset?: boolean; encoding?: string; quote_char?: string; quote_attr_values?: boolean; minimize_boolean_attributes?: boolean; escape_lt_in_attrs?: boolean; use_trailing_solidus?: boolean; escape_rcdata?: boolean; strip_whitespace?: boolean }): string {
|
|
64
|
-
const encoding = options?.encoding || 'utf-8';
|
|
65
|
-
let result = '';
|
|
66
|
-
let inScript = false;
|
|
67
|
-
let inPre = false;
|
|
68
|
-
let inTextarea = false;
|
|
69
|
-
let inStyle = false;
|
|
70
|
-
let serializingHead = true;
|
|
71
|
-
|
|
72
|
-
// If inject_meta_charset, modify tokens
|
|
73
|
-
let processedTokens = tokens;
|
|
74
|
-
if (options?.inject_meta_charset) {
|
|
75
|
-
let hasCharset = false;
|
|
76
|
-
let modifiedTokens: any[] = [];
|
|
77
|
-
let inHead = false;
|
|
78
|
-
|
|
79
|
-
// First pass: check if has charset
|
|
80
|
-
for (const token of tokens) {
|
|
81
|
-
const type = token[0];
|
|
82
|
-
if (type === 'StartTag' && token[2] === 'head') {
|
|
83
|
-
inHead = true;
|
|
84
|
-
} else if (type === 'EndTag' && token[2] === 'head') {
|
|
85
|
-
inHead = false;
|
|
86
|
-
} else if (inHead && type === 'EmptyTag' && token[1] === 'meta') {
|
|
87
|
-
const attrs = token[2];
|
|
88
|
-
if (attrs.some((attr: any) => attr.name === 'charset')) {
|
|
89
|
-
hasCharset = true;
|
|
90
|
-
}
|
|
91
|
-
const hasHttpEquiv = attrs.some((attr: any) => attr.name === 'http-equiv' && attr.value === 'content-type');
|
|
92
|
-
if (hasHttpEquiv) {
|
|
93
|
-
const contentAttr = attrs.find((attr: any) => attr.name === 'content');
|
|
94
|
-
if (contentAttr && contentAttr.value.includes('charset=')) {
|
|
95
|
-
hasCharset = true;
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
// Second pass: modify
|
|
102
|
-
inHead = false;
|
|
103
|
-
for (const token of tokens) {
|
|
104
|
-
const type = token[0];
|
|
105
|
-
if (type === 'StartTag' && token[2] === 'head') {
|
|
106
|
-
inHead = true;
|
|
107
|
-
modifiedTokens.push(token);
|
|
108
|
-
if (!hasCharset && options?.encoding) {
|
|
109
|
-
modifiedTokens.push(['EmptyTag', 'meta', [{ name: 'charset', value: encoding }]]);
|
|
110
|
-
}
|
|
111
|
-
} else if (type === 'EndTag' && token[2] === 'head') {
|
|
112
|
-
inHead = false;
|
|
113
|
-
modifiedTokens.push(token);
|
|
114
|
-
} else if (inHead && type === 'EmptyTag' && token[1] === 'meta') {
|
|
115
|
-
let newAttrs = token[2].slice();
|
|
116
|
-
let isHttpEquiv = false;
|
|
117
|
-
for (let i = 0; i < newAttrs.length; i++) {
|
|
118
|
-
const attr = newAttrs[i];
|
|
119
|
-
if (attr.name === 'charset' && options?.encoding) {
|
|
120
|
-
newAttrs[i] = { name: 'charset', value: encoding };
|
|
121
|
-
} else if (attr.name === 'http-equiv' && attr.value === 'content-type') {
|
|
122
|
-
isHttpEquiv = true;
|
|
123
|
-
} else if (attr.name === 'content' && isHttpEquiv && options?.encoding) {
|
|
124
|
-
newAttrs[i] = { name: 'content', value: attr.value.replace(/charset=[^;]*/, 'charset=' + encoding) };
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
modifiedTokens.push([type, token[1], newAttrs]);
|
|
128
|
-
} else {
|
|
129
|
-
modifiedTokens.push(token);
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
processedTokens = modifiedTokens;
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
// Serialize
|
|
136
|
-
let omitHtml = false;
|
|
137
|
-
let omitHead = false;
|
|
138
|
-
let omitBody = false;
|
|
139
|
-
let omitColgroup = false;
|
|
140
|
-
let omitTbody = false;
|
|
141
|
-
let headHasContent = false;
|
|
142
|
-
let inHead = false;
|
|
143
|
-
// First pass to detect optional tags
|
|
144
|
-
let htmlStartIndex = -1;
|
|
145
|
-
let headStartIndex = -1;
|
|
146
|
-
let bodyStartIndex = -1;
|
|
147
|
-
let colgroupStartIndex = -1;
|
|
148
|
-
let tbodyStartIndex = -1;
|
|
149
|
-
let tbodyCount = 0;
|
|
150
|
-
let colgroupCount = 0;
|
|
151
|
-
for (let i = 0; i < processedTokens.length; i++) {
|
|
152
|
-
const token = processedTokens[i];
|
|
153
|
-
const type = token[0];
|
|
154
|
-
if (type === 'StartTag') {
|
|
155
|
-
const name = token[2];
|
|
156
|
-
if (name === 'html') {
|
|
157
|
-
htmlStartIndex = i;
|
|
158
|
-
}
|
|
159
|
-
if (name === 'head') {
|
|
160
|
-
headStartIndex = i;
|
|
161
|
-
}
|
|
162
|
-
if (name === 'body') {
|
|
163
|
-
bodyStartIndex = i;
|
|
164
|
-
}
|
|
165
|
-
if (name === 'colgroup') {
|
|
166
|
-
colgroupStartIndex = i;
|
|
167
|
-
colgroupCount++;
|
|
168
|
-
}
|
|
169
|
-
if (name === 'tbody') {
|
|
170
|
-
tbodyStartIndex = i;
|
|
171
|
-
tbodyCount++;
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
// Check if html should be omitted
|
|
176
|
-
if (htmlStartIndex >= 0) {
|
|
177
|
-
const htmlToken = processedTokens[htmlStartIndex];
|
|
178
|
-
const attrs = htmlToken[3];
|
|
179
|
-
const hasAttributes = Array.isArray(attrs) ? attrs.length > 0 : (attrs ? Object.keys(attrs).length > 0 : false);
|
|
180
|
-
if (hasAttributes) {
|
|
181
|
-
omitHtml = false;
|
|
182
|
-
} else {
|
|
183
|
-
let firstToken = null;
|
|
184
|
-
for (let j = htmlStartIndex + 1; j < processedTokens.length; j++) {
|
|
185
|
-
const t = processedTokens[j];
|
|
186
|
-
if (t[0] !== 'Characters' || t[1].trim() !== '') {
|
|
187
|
-
firstToken = t;
|
|
188
|
-
break;
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
if (!firstToken) {
|
|
192
|
-
omitHtml = true;
|
|
193
|
-
} else if (firstToken[0] === 'Comment') {
|
|
194
|
-
omitHtml = false;
|
|
195
|
-
} else if (firstToken[0] === 'Characters') {
|
|
196
|
-
if (/^\s/.test(firstToken[1])) {
|
|
197
|
-
omitHtml = false;
|
|
198
|
-
} else {
|
|
199
|
-
omitHtml = true;
|
|
200
|
-
}
|
|
201
|
-
} else {
|
|
202
|
-
omitHtml = true;
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
// Check if head should be omitted
|
|
207
|
-
if (headStartIndex >= 0) {
|
|
208
|
-
let firstToken = null;
|
|
209
|
-
for (let j = headStartIndex + 1; j < processedTokens.length; j++) {
|
|
210
|
-
const t = processedTokens[j];
|
|
211
|
-
if (t[0] !== 'Characters' || t[1].trim() !== '') {
|
|
212
|
-
firstToken = t;
|
|
213
|
-
break;
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
omitHead = false;
|
|
217
|
-
if (firstToken) {
|
|
218
|
-
if (firstToken[0] === 'StartTag') {
|
|
219
|
-
omitHead = true;
|
|
220
|
-
} else if (firstToken[0] === 'EndTag' && firstToken[2] === 'head') {
|
|
221
|
-
omitHead = true;
|
|
222
|
-
} else if (firstToken[0] === 'EmptyTag') {
|
|
223
|
-
omitHead = true;
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
// Check if body should be omitted
|
|
228
|
-
if (bodyStartIndex >= 0) {
|
|
229
|
-
let firstToken = null;
|
|
230
|
-
for (let j = bodyStartIndex + 1; j < processedTokens.length; j++) {
|
|
231
|
-
const t = processedTokens[j];
|
|
232
|
-
if (t[0] !== 'Characters' || t[1].trim() !== '') {
|
|
233
|
-
firstToken = t;
|
|
234
|
-
break;
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
omitBody = false;
|
|
238
|
-
if (firstToken) {
|
|
239
|
-
if (firstToken[0] === 'StartTag') {
|
|
240
|
-
omitBody = true;
|
|
241
|
-
} else if (firstToken[0] === 'EndTag') {
|
|
242
|
-
omitBody = true;
|
|
243
|
-
} else if (firstToken[0] === 'Characters' && !/^\s/.test(firstToken[1])) {
|
|
244
|
-
omitBody = true;
|
|
245
|
-
}
|
|
246
|
-
} else {
|
|
247
|
-
omitBody = true;
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
|
-
// Check if colgroup should be omitted
|
|
251
|
-
if (colgroupStartIndex >= 0) {
|
|
252
|
-
const colgroupToken = processedTokens[colgroupStartIndex];
|
|
253
|
-
const attrs = colgroupToken[3];
|
|
254
|
-
const hasAttributes = Array.isArray(attrs) ? attrs.length > 0 : (attrs ? Object.keys(attrs).length > 0 : false);
|
|
255
|
-
let firstToken = null;
|
|
256
|
-
for (let j = colgroupStartIndex + 1; j < processedTokens.length; j++) {
|
|
257
|
-
const t = processedTokens[j];
|
|
258
|
-
if (t[0] !== 'Characters' || t[1].trim() !== '') {
|
|
259
|
-
firstToken = t;
|
|
260
|
-
break;
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
|
-
omitColgroup = !hasAttributes && firstToken && (firstToken[0] === 'StartTag' || firstToken[0] === 'EmptyTag') && ((firstToken[0] === 'StartTag' ? firstToken[2] : firstToken[1]) === 'col');
|
|
264
|
-
}
|
|
265
|
-
// Check if tbody should be omitted - we'll check this per tbody in the loop
|
|
266
|
-
// omitTbody is now calculated per element
|
|
267
|
-
|
|
268
|
-
for (let i = 0; i < processedTokens.length; i++) {
|
|
269
|
-
const token = processedTokens[i];
|
|
270
|
-
const nextToken = processedTokens[i + 1];
|
|
271
|
-
const type = token[0];
|
|
272
|
-
switch (type) {
|
|
273
|
-
case 'StartTag':
|
|
274
|
-
const [, , name, attrs] = token;
|
|
275
|
-
const attrCount = Array.isArray(attrs) ? attrs.length : (attrs ? Object.keys(attrs).length : 0);
|
|
276
|
-
|
|
277
|
-
// Check if tbody should be omitted for this specific tbody
|
|
278
|
-
let omitThisTbody = false;
|
|
279
|
-
if (name === 'tbody') {
|
|
280
|
-
const hasAttributes = Array.isArray(attrs) ? attrs.length > 0 : (attrs ? Object.keys(attrs).length > 0 : false);
|
|
281
|
-
if (!hasAttributes) {
|
|
282
|
-
// Check if first significant token after tbody is a tr
|
|
283
|
-
let firstToken = null;
|
|
284
|
-
for (let j = i + 1; j < processedTokens.length; j++) {
|
|
285
|
-
const t = processedTokens[j];
|
|
286
|
-
if (t[0] !== 'Characters' || t[1].trim() !== '') {
|
|
287
|
-
firstToken = t;
|
|
288
|
-
break;
|
|
289
|
-
}
|
|
290
|
-
}
|
|
291
|
-
const hasTrChild = firstToken && (firstToken[0] === 'StartTag' || firstToken[0] === 'EmptyTag') && firstToken[2] === 'tr';
|
|
292
|
-
|
|
293
|
-
if (hasTrChild) {
|
|
294
|
-
// Check if not preceded by tbody, thead, or tfoot
|
|
295
|
-
// This is indicated by whether the fragment starts with EndTag of those elements
|
|
296
|
-
let isPreceded = false;
|
|
297
|
-
for (let j = 0; j < i; j++) {
|
|
298
|
-
const t = processedTokens[j];
|
|
299
|
-
if (t[0] === 'Characters' && t[1].trim() === '') continue;
|
|
300
|
-
if (t[0] === 'EndTag' && ['tbody', 'thead', 'tfoot'].includes(t[2])) {
|
|
301
|
-
isPreceded = true;
|
|
302
|
-
}
|
|
303
|
-
break; // Only check the first significant token
|
|
304
|
-
}
|
|
305
|
-
omitThisTbody = !isPreceded;
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
if (name === 'colgroup' && omitColgroup) continue;
|
|
311
|
-
if (name === 'tbody' && omitThisTbody) continue;
|
|
312
|
-
if (name === 'head' && omitHead) continue;
|
|
313
|
-
if (name === 'body' && omitBody) continue;
|
|
314
|
-
if (name === 'html' && omitHtml) continue;
|
|
315
|
-
if (name === 'pre') inPre = true;
|
|
316
|
-
if (name === 'textarea') inTextarea = true;
|
|
317
|
-
if (name === 'script') inScript = true;
|
|
318
|
-
if (name === 'style') inStyle = true;
|
|
319
|
-
if (name === 'head') {
|
|
320
|
-
if (options?.inject_meta_charset) {
|
|
321
|
-
serializingHead = true;
|
|
322
|
-
} else {
|
|
323
|
-
result += '<' + name + serializeAttributes(attrs, options) + '>';
|
|
324
|
-
}
|
|
325
|
-
} else if (serializingHead) {
|
|
326
|
-
result += '<' + name + serializeAttributes(attrs, options) + '>';
|
|
327
|
-
}
|
|
328
|
-
break;
|
|
329
|
-
case 'EmptyTag':
|
|
330
|
-
const [, name2, attrs2] = token;
|
|
331
|
-
result += '<' + name2 + serializeAttributes(attrs2, options) + (options?.use_trailing_solidus ? ' />' : '>');
|
|
332
|
-
break;
|
|
333
|
-
case 'EndTag':
|
|
334
|
-
const [, , name3] = token;
|
|
335
|
-
// Check if end-tag should be omitted
|
|
336
|
-
let omitEndTag = false;
|
|
337
|
-
if (['html', 'head', 'body'].includes(name3)) {
|
|
338
|
-
if (!nextToken || nextToken[0] === 'StartTag' || nextToken[0] === 'EndTag' || (nextToken[0] === 'Characters' && !/^\s/.test(nextToken[1]))) {
|
|
339
|
-
omitEndTag = true;
|
|
340
|
-
}
|
|
341
|
-
} else if (nextToken) {
|
|
342
|
-
const nextType = nextToken[0];
|
|
343
|
-
let nextName = null;
|
|
344
|
-
if (nextType === 'StartTag' || nextType === 'EndTag') {
|
|
345
|
-
nextName = nextToken[2];
|
|
346
|
-
} else if (nextType === 'EmptyTag') {
|
|
347
|
-
nextName = nextToken[1];
|
|
348
|
-
}
|
|
349
|
-
if (nextType === 'EndTag') {
|
|
350
|
-
omitEndTag = ['p', 'li', 'option', 'optgroup', 'tbody', 'tfoot', 'tr', 'td', 'th', 'colgroup', 'dd'].includes(name3);
|
|
351
|
-
} else if (nextType === 'StartTag') {
|
|
352
|
-
if (name3 === 'p' && ['address', 'article', 'aside', 'blockquote', 'datagrid', 'dialog', 'dir', 'div', 'dl', 'fieldset', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hr', 'menu', 'nav', 'ol', 'p', 'pre', 'section', 'table', 'ul'].includes(nextName)) {
|
|
353
|
-
omitEndTag = true;
|
|
354
|
-
} else if (name3 === 'li' && nextName === 'li') {
|
|
355
|
-
omitEndTag = true;
|
|
356
|
-
} else if ((name3 === 'dt' || name3 === 'dd') && (nextName === 'dt' || nextName === 'dd')) {
|
|
357
|
-
omitEndTag = true;
|
|
358
|
-
} else if (name3 === 'option' && (nextName === 'option' || nextName === 'optgroup')) {
|
|
359
|
-
omitEndTag = true;
|
|
360
|
-
} else if (name3 === 'optgroup' && nextName === 'optgroup') {
|
|
361
|
-
omitEndTag = true;
|
|
362
|
-
} else if ((name3 === 'tbody' || name3 === 'tfoot') && (nextName === 'tbody' || nextName === 'tfoot')) {
|
|
363
|
-
omitEndTag = true;
|
|
364
|
-
} else if (name3 === 'thead' && (nextName === 'tbody' || nextName === 'tfoot')) {
|
|
365
|
-
omitEndTag = true;
|
|
366
|
-
} else if (name3 === 'tr' && nextName === 'tr') {
|
|
367
|
-
omitEndTag = true;
|
|
368
|
-
} else if ((name3 === 'td' || name3 === 'th') && (nextName === 'td' || nextName === 'th')) {
|
|
369
|
-
omitEndTag = true;
|
|
370
|
-
} else if (name3 === 'colgroup' && nextName !== 'colgroup') {
|
|
371
|
-
omitEndTag = true;
|
|
372
|
-
}
|
|
373
|
-
if (name3 === 'p' && nextName === 'hr') {
|
|
374
|
-
omitEndTag = true;
|
|
375
|
-
}
|
|
376
|
-
} else if (nextType === 'EmptyTag') {
|
|
377
|
-
if (name3 === 'p' && nextName === 'hr') {
|
|
378
|
-
omitEndTag = true;
|
|
379
|
-
}
|
|
380
|
-
}
|
|
381
|
-
if (name3 === 'colgroup' && nextType === 'Characters' && !/^\s/.test(nextToken[1])) {
|
|
382
|
-
omitEndTag = true;
|
|
383
|
-
}
|
|
384
|
-
} else {
|
|
385
|
-
// At EOF, omit certain end-tags
|
|
386
|
-
omitEndTag = ['p', 'li', 'option', 'optgroup', 'tbody', 'tfoot', 'tr', 'td', 'th', 'colgroup', 'dd'].includes(name3);
|
|
387
|
-
}
|
|
388
|
-
if (omitEndTag) continue;
|
|
389
|
-
if (name3 === 'script') inScript = false;
|
|
390
|
-
if (name3 === 'pre') inPre = false;
|
|
391
|
-
if (name3 === 'textarea') inTextarea = false;
|
|
392
|
-
if (name3 === 'style') inStyle = false;
|
|
393
|
-
if (name3 === 'head') {
|
|
394
|
-
if (options?.inject_meta_charset) {
|
|
395
|
-
serializingHead = false;
|
|
396
|
-
} else {
|
|
397
|
-
result += '</' + name3 + '>';
|
|
398
|
-
}
|
|
399
|
-
} else if (serializingHead) {
|
|
400
|
-
result += '</' + name3 + '>';
|
|
401
|
-
}
|
|
402
|
-
break;
|
|
403
|
-
case 'Characters':
|
|
404
|
-
if (serializingHead) {
|
|
405
|
-
let text = token[1];
|
|
406
|
-
if (options?.strip_whitespace && !inPre && !inTextarea && !inScript && !inStyle) {
|
|
407
|
-
text = text.replace(/\s+/g, ' ');
|
|
408
|
-
}
|
|
409
|
-
if (inScript) {
|
|
410
|
-
if (options?.escape_rcdata) {
|
|
411
|
-
result += escapeText(text);
|
|
412
|
-
} else {
|
|
413
|
-
result += text;
|
|
414
|
-
}
|
|
415
|
-
} else if (inTextarea) {
|
|
416
|
-
if (options?.escape_rcdata) {
|
|
417
|
-
result += escapeText(text);
|
|
418
|
-
} else {
|
|
419
|
-
result += text;
|
|
420
|
-
}
|
|
421
|
-
} else {
|
|
422
|
-
result += escapeText(text);
|
|
423
|
-
}
|
|
424
|
-
}
|
|
425
|
-
break;
|
|
426
|
-
case 'Doctype':
|
|
427
|
-
if (serializingHead) {
|
|
428
|
-
result += '<!DOCTYPE ' + token[1];
|
|
429
|
-
if (token[2]) {
|
|
430
|
-
result += ' PUBLIC "' + token[2] + '"';
|
|
431
|
-
if (token[3]) result += ' "' + token[3] + '"';
|
|
432
|
-
} else if (token[3]) {
|
|
433
|
-
result += ' SYSTEM "' + token[3] + '"';
|
|
434
|
-
}
|
|
435
|
-
result += '>';
|
|
436
|
-
}
|
|
437
|
-
break;
|
|
438
|
-
case 'Comment':
|
|
439
|
-
if (serializingHead) {
|
|
440
|
-
result += '<!--' + token[1] + '-->';
|
|
441
|
-
}
|
|
442
|
-
break;
|
|
443
|
-
default:
|
|
444
|
-
// Ignore unknown tokens
|
|
445
|
-
break;
|
|
446
|
-
}
|
|
447
|
-
}
|
|
448
|
-
|
|
449
|
-
return result;
|
|
450
|
-
}
|