@tkeron/html-parser 1.1.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/.github/workflows/npm_deploy.yml +14 -4
  2. package/README.md +6 -6
  3. package/bun.lock +6 -8
  4. package/check-versions.ts +147 -0
  5. package/index.ts +4 -8
  6. package/package.json +5 -6
  7. package/src/dom-simulator/append-child.ts +130 -0
  8. package/src/dom-simulator/append.ts +18 -0
  9. package/src/dom-simulator/attributes.ts +23 -0
  10. package/src/dom-simulator/clone-node.ts +51 -0
  11. package/src/dom-simulator/convert-ast-node-to-dom.ts +37 -0
  12. package/src/dom-simulator/create-cdata.ts +18 -0
  13. package/src/dom-simulator/create-comment.ts +23 -0
  14. package/src/dom-simulator/create-doctype.ts +24 -0
  15. package/src/dom-simulator/create-document.ts +81 -0
  16. package/src/dom-simulator/create-element.ts +195 -0
  17. package/src/dom-simulator/create-processing-instruction.ts +19 -0
  18. package/src/dom-simulator/create-temp-parent.ts +9 -0
  19. package/src/dom-simulator/create-text-node.ts +23 -0
  20. package/src/dom-simulator/escape-text-content.ts +6 -0
  21. package/src/dom-simulator/find-special-elements.ts +14 -0
  22. package/src/dom-simulator/get-text-content.ts +18 -0
  23. package/src/dom-simulator/index.ts +36 -0
  24. package/src/dom-simulator/inner-outer-html.ts +182 -0
  25. package/src/dom-simulator/insert-after.ts +20 -0
  26. package/src/dom-simulator/insert-before.ts +108 -0
  27. package/src/dom-simulator/matches.ts +26 -0
  28. package/src/dom-simulator/node-types.ts +26 -0
  29. package/src/dom-simulator/prepend.ts +24 -0
  30. package/src/dom-simulator/remove-child.ts +68 -0
  31. package/src/dom-simulator/remove.ts +7 -0
  32. package/src/dom-simulator/replace-child.ts +152 -0
  33. package/src/dom-simulator/set-text-content.ts +33 -0
  34. package/src/dom-simulator/update-element-content.ts +56 -0
  35. package/src/dom-simulator.ts +12 -1126
  36. package/src/encoding/constants.ts +8 -0
  37. package/src/encoding/detect-encoding.ts +21 -0
  38. package/src/encoding/index.ts +1 -0
  39. package/src/encoding/normalize-encoding.ts +6 -0
  40. package/src/html-entities.ts +2127 -0
  41. package/src/index.ts +5 -5
  42. package/src/parser/adoption-agency-helpers.ts +145 -0
  43. package/src/parser/constants.ts +137 -0
  44. package/src/parser/dom-to-ast.ts +79 -0
  45. package/src/parser/index.ts +9 -0
  46. package/src/parser/parse.ts +772 -0
  47. package/src/parser/types.ts +56 -0
  48. package/src/selectors/find-elements-descendant.ts +47 -0
  49. package/src/selectors/index.ts +2 -0
  50. package/src/selectors/matches-selector.ts +12 -0
  51. package/src/selectors/matches-token.ts +27 -0
  52. package/src/selectors/parse-selector.ts +48 -0
  53. package/src/selectors/query-selector-all.ts +43 -0
  54. package/src/selectors/query-selector.ts +6 -0
  55. package/src/selectors/types.ts +10 -0
  56. package/src/serializer/attributes.ts +74 -0
  57. package/src/serializer/escape.ts +13 -0
  58. package/src/serializer/index.ts +1 -0
  59. package/src/serializer/serialize-tokens.ts +511 -0
  60. package/src/tokenizer/calculate-position.ts +10 -0
  61. package/src/tokenizer/constants.ts +11 -0
  62. package/src/tokenizer/decode-entities.ts +64 -0
  63. package/src/tokenizer/index.ts +2 -0
  64. package/src/tokenizer/parse-attributes.ts +74 -0
  65. package/src/tokenizer/tokenize.ts +165 -0
  66. package/src/tokenizer/types.ts +25 -0
  67. package/tests/adoption-agency-helpers.test.ts +304 -0
  68. package/tests/advanced.test.ts +242 -221
  69. package/tests/cloneNode.test.ts +19 -66
  70. package/tests/custom-elements-head.test.ts +54 -55
  71. package/tests/dom-extended.test.ts +77 -64
  72. package/tests/dom-manipulation.test.ts +51 -24
  73. package/tests/dom.test.ts +15 -13
  74. package/tests/encoding/detect-encoding.test.ts +33 -0
  75. package/tests/google-dom.test.ts +2 -2
  76. package/tests/helpers/tokenizer-adapter.test.ts +29 -43
  77. package/tests/helpers/tokenizer-adapter.ts +36 -33
  78. package/tests/helpers/tree-adapter.test.ts +20 -20
  79. package/tests/helpers/tree-adapter.ts +34 -24
  80. package/tests/html-entities-text.test.ts +6 -2
  81. package/tests/innerhtml-void-elements.test.ts +52 -36
  82. package/tests/outerHTML-replacement.test.ts +37 -65
  83. package/tests/parser/dom-to-ast.test.ts +109 -0
  84. package/tests/parser/parse.test.ts +139 -0
  85. package/tests/parser.test.ts +281 -217
  86. package/tests/selectors/query-selector-all.test.ts +39 -0
  87. package/tests/selectors/query-selector.test.ts +42 -0
  88. package/tests/serializer/attributes.test.ts +132 -0
  89. package/tests/serializer/escape.test.ts +51 -0
  90. package/tests/serializer/serialize-tokens.test.ts +80 -0
  91. package/tests/serializer-core.test.ts +6 -6
  92. package/tests/serializer-injectmeta.test.ts +6 -6
  93. package/tests/serializer-optionaltags.test.ts +9 -6
  94. package/tests/serializer-options.test.ts +6 -6
  95. package/tests/serializer-whitespace.test.ts +6 -6
  96. package/tests/tokenizer/calculate-position.test.ts +34 -0
  97. package/tests/tokenizer/decode-entities.test.ts +31 -0
  98. package/tests/tokenizer/parse-attributes.test.ts +44 -0
  99. package/tests/tokenizer/tokenize.test.ts +757 -0
  100. package/tests/tokenizer-namedEntities.test.ts +10 -7
  101. package/tests/tokenizer-pendingSpecChanges.test.ts +10 -7
  102. package/tests/tokenizer.test.ts +268 -256
  103. package/tests/tree-construction-adoption01.test.ts +25 -16
  104. package/tests/tree-construction-adoption02.test.ts +30 -19
  105. package/tests/tree-construction-domjs-unsafe.test.ts +6 -4
  106. package/tests/tree-construction-entities02.test.ts +18 -16
  107. package/tests/tree-construction-html5test-com.test.ts +16 -10
  108. package/tests/tree-construction-math.test.ts +11 -9
  109. package/tests/tree-construction-namespace-sensitivity.test.ts +11 -9
  110. package/tests/tree-construction-noscript01.test.ts +11 -9
  111. package/tests/tree-construction-ruby.test.ts +6 -4
  112. package/tests/tree-construction-scriptdata01.test.ts +6 -4
  113. package/tests/tree-construction-svg.test.ts +6 -4
  114. package/tests/tree-construction-template.test.ts +6 -4
  115. package/tests/tree-construction-tests10.test.ts +6 -4
  116. package/tests/tree-construction-tests11.test.ts +6 -4
  117. package/tests/tree-construction-tests20.test.ts +7 -4
  118. package/tests/tree-construction-tests21.test.ts +7 -4
  119. package/tests/tree-construction-tests23.test.ts +7 -4
  120. package/tests/tree-construction-tests24.test.ts +7 -4
  121. package/tests/tree-construction-tests5.test.ts +6 -5
  122. package/tests/tree-construction-tests6.test.ts +6 -5
  123. package/tests/tree-construction-tests_innerHTML_1.test.ts +6 -5
  124. package/tests/void-elements.test.ts +85 -40
  125. package/tsconfig.json +1 -1
  126. package/src/css-selector.ts +0 -185
  127. package/src/encoding.ts +0 -39
  128. package/src/parser.ts +0 -682
  129. package/src/serializer.ts +0 -450
  130. package/src/tokenizer.ts +0 -325
  131. package/tests/selectors.test.ts +0 -128
@@ -0,0 +1,772 @@
1
+ import type { Token } from "../tokenizer/index";
2
+ import { TokenType } from "../tokenizer/index";
3
+ import {
4
+ createDocument,
5
+ createElement,
6
+ createTextNode,
7
+ createComment,
8
+ createCDATA,
9
+ createProcessingInstruction,
10
+ createDoctype,
11
+ appendChild,
12
+ } from "../dom-simulator/index.js";
13
+ import type { ParserState } from "./types";
14
+ import { InsertionMode } from "./types";
15
+ import {
16
+ VOID_ELEMENTS,
17
+ RAW_TEXT_ELEMENTS,
18
+ AUTO_CLOSE_RULES,
19
+ FORMATTING_ELEMENTS,
20
+ TABLE_CONTEXT_ELEMENTS,
21
+ VALID_TABLE_CHILDREN,
22
+ VALID_TABLE_SECTION_CHILDREN,
23
+ VALID_TR_CHILDREN,
24
+ } from "./constants";
25
+ import {
26
+ findFormattingElementInStack,
27
+ findFurthestBlock,
28
+ getCommonAncestor,
29
+ cloneFormattingElement,
30
+ reparentChildren,
31
+ } from "./adoption-agency-helpers.js";
32
+
33
+ export const parse = (tokens: Token[]): any => {
34
+ const state = createParserState(tokens);
35
+
36
+ while (state.position < state.length) {
37
+ const token = getCurrentToken(state);
38
+
39
+ if (!token || token.type === TokenType.EOF) {
40
+ break;
41
+ }
42
+
43
+ parseToken(state, token);
44
+ advance(state);
45
+ }
46
+
47
+ let hasHtml = false;
48
+ for (const child of state.root.childNodes) {
49
+ if (child.nodeType === 1 && child.tagName === "HTML") {
50
+ hasHtml = true;
51
+ state.root.documentElement = child;
52
+ break;
53
+ }
54
+ }
55
+ if (!hasHtml) {
56
+ const html = createElement("html", {});
57
+ const head = createElement("head", {});
58
+ const body = createElement("body", {});
59
+ appendChild(html, head);
60
+ appendChild(html, body);
61
+
62
+ const doctypes: any[] = [];
63
+ const commentsBeforeHtml: any[] = [];
64
+ const bodyContent: any[] = [];
65
+ const children = [...state.root.childNodes];
66
+
67
+ let foundElement = false;
68
+ for (const child of children) {
69
+ if (child.nodeType === 10) {
70
+ doctypes.push(child);
71
+ } else if (child.nodeType === 8 && !foundElement) {
72
+ commentsBeforeHtml.push(child);
73
+ } else {
74
+ if (child.nodeType === 1) foundElement = true;
75
+ bodyContent.push(child);
76
+ }
77
+ }
78
+
79
+ for (const content of bodyContent) {
80
+ appendChild(body, content);
81
+ }
82
+
83
+ state.root.childNodes = [];
84
+ for (const doctype of doctypes) {
85
+ doctype.parentNode = null;
86
+ appendChild(state.root, doctype);
87
+ }
88
+ for (const comment of commentsBeforeHtml) {
89
+ comment.parentNode = null;
90
+ appendChild(state.root, comment);
91
+ }
92
+ appendChild(state.root, html);
93
+ state.root.documentElement = html;
94
+ state.root.head = head;
95
+ state.root.body = body;
96
+ }
97
+
98
+ while (state.stack.length > 1) {
99
+ const unclosedElement = state.stack.pop()!;
100
+ const currentToken = getCurrentToken(state);
101
+ addError(
102
+ state,
103
+ `Unclosed tag: ${unclosedElement.tagName}`,
104
+ currentToken?.position?.offset || 0,
105
+ );
106
+ }
107
+
108
+ return state.root;
109
+ };
110
+
111
+ const createParserState = (tokens: Token[]): ParserState => {
112
+ const root = createDocument();
113
+
114
+ return {
115
+ tokens,
116
+ position: 0,
117
+ length: tokens.length,
118
+ stack: [root],
119
+ root,
120
+ insertionMode: InsertionMode.Initial,
121
+ errors: [],
122
+ activeFormattingElements: [],
123
+ };
124
+ };
125
+
126
+ const parseToken = (state: ParserState, token: Token): void => {
127
+ switch (state.insertionMode) {
128
+ case InsertionMode.Initial:
129
+ parseTokenInInitialMode(state, token);
130
+ break;
131
+ case InsertionMode.BeforeHtml:
132
+ parseTokenInBeforeHtmlMode(state, token);
133
+ break;
134
+ case InsertionMode.BeforeHead:
135
+ parseTokenInBeforeHeadMode(state, token);
136
+ break;
137
+ case InsertionMode.InHead:
138
+ parseTokenInInHeadMode(state, token);
139
+ break;
140
+ case InsertionMode.AfterHead:
141
+ parseTokenInAfterHeadMode(state, token);
142
+ break;
143
+ case InsertionMode.InBody:
144
+ parseTokenInInBodyMode(state, token);
145
+ break;
146
+ default:
147
+ parseTokenInInBodyMode(state, token);
148
+ }
149
+ };
150
+
151
+ const parseTokenInInitialMode = (state: ParserState, token: Token): void => {
152
+ if (token.type === TokenType.DOCTYPE) {
153
+ parseDoctype(state, token);
154
+ state.insertionMode = InsertionMode.BeforeHtml;
155
+ } else if (token.type === TokenType.COMMENT) {
156
+ parseComment(state, token);
157
+ } else if (token.type === TokenType.TEXT && token.value.trim() === "") {
158
+ } else {
159
+ const doctype = createDoctype("html");
160
+ appendChild(state.root, doctype);
161
+ state.insertionMode = InsertionMode.BeforeHtml;
162
+ parseToken(state, token);
163
+ }
164
+ };
165
+
166
+ const parseTokenInBeforeHtmlMode = (state: ParserState, token: Token): void => {
167
+ if (
168
+ token.type === TokenType.TAG_OPEN &&
169
+ token.value.toLowerCase() === "html"
170
+ ) {
171
+ const html = createElement("html", token.attributes || {});
172
+ appendChild(state.root, html);
173
+ state.root.documentElement = html;
174
+ state.stack.push(html);
175
+ state.insertionMode = InsertionMode.BeforeHead;
176
+ } else if (token.type === TokenType.COMMENT) {
177
+ parseComment(state, token);
178
+ } else if (token.type === TokenType.DOCTYPE) {
179
+ } else if (token.type === TokenType.TEXT && token.value.trim() === "") {
180
+ } else {
181
+ const html = createElement("html", {});
182
+ appendChild(state.root, html);
183
+ state.root.documentElement = html;
184
+ state.stack.push(html);
185
+ state.insertionMode = InsertionMode.BeforeHead;
186
+ parseToken(state, token);
187
+ }
188
+ };
189
+
190
+ const parseTokenInBeforeHeadMode = (state: ParserState, token: Token): void => {
191
+ if (
192
+ token.type === TokenType.TAG_OPEN &&
193
+ token.value.toLowerCase() === "head"
194
+ ) {
195
+ const head = createElement("head", token.attributes || {});
196
+ appendChild(getCurrentParent(state), head);
197
+ state.root.head = head;
198
+ state.stack.push(head);
199
+ state.insertionMode = InsertionMode.InHead;
200
+ state.explicitHead = true;
201
+ } else if (token.type === TokenType.COMMENT) {
202
+ parseComment(state, token);
203
+ } else if (token.type === TokenType.TEXT && token.value.trim() === "") {
204
+ } else {
205
+ const head = createElement("head", {});
206
+ appendChild(getCurrentParent(state), head);
207
+ state.root.head = head;
208
+ state.stack.push(head);
209
+ state.insertionMode = InsertionMode.InHead;
210
+ state.explicitHead = false;
211
+ parseToken(state, token);
212
+ }
213
+ };
214
+
215
+ const parseOpenTag = (state: ParserState, token: Token): void => {
216
+ const tagName = token.value.toLowerCase();
217
+ const currentParent = getCurrentParent(state);
218
+ const element = createElement(
219
+ tagName,
220
+ token.attributes || {},
221
+ undefined,
222
+ token.isSelfClosing,
223
+ );
224
+ appendChild(currentParent, element);
225
+
226
+ if (!token.isSelfClosing && !VOID_ELEMENTS.has(tagName)) {
227
+ state.stack.push(element);
228
+ }
229
+ };
230
+
231
+ const parseTokenInInHeadMode = (state: ParserState, token: Token): void => {
232
+ const currentElement = getCurrentElement(state);
233
+ const currentTagName = currentElement?.tagName?.toLowerCase();
234
+
235
+ if (RAW_TEXT_ELEMENTS.has(currentTagName)) {
236
+ if (token.type === TokenType.TEXT) {
237
+ parseText(state, token);
238
+ return;
239
+ } else if (
240
+ token.type === TokenType.TAG_CLOSE &&
241
+ token.value.toLowerCase() === currentTagName
242
+ ) {
243
+ state.stack.pop();
244
+ return;
245
+ }
246
+ }
247
+
248
+ if (token.type === TokenType.TAG_OPEN) {
249
+ const tagName = token.value.toLowerCase();
250
+ if (
251
+ tagName === "title" ||
252
+ tagName === "style" ||
253
+ tagName === "script" ||
254
+ tagName === "noscript"
255
+ ) {
256
+ parseOpenTag(state, token);
257
+ } else if (tagName === "meta" || tagName === "link" || tagName === "base") {
258
+ parseOpenTag(state, token);
259
+ } else if (tagName === "head") {
260
+ } else if (tagName.includes("-")) {
261
+ if (state.explicitHead) {
262
+ parseOpenTag(state, token);
263
+ } else {
264
+ state.stack.pop();
265
+ state.insertionMode = InsertionMode.AfterHead;
266
+ parseToken(state, token);
267
+ }
268
+ } else {
269
+ state.stack.pop();
270
+ state.insertionMode = InsertionMode.AfterHead;
271
+ parseToken(state, token);
272
+ }
273
+ } else if (token.type === TokenType.TAG_CLOSE) {
274
+ const tagName = token.value.toLowerCase();
275
+ if (tagName === "head") {
276
+ state.stack.pop();
277
+ state.insertionMode = InsertionMode.AfterHead;
278
+ } else if (
279
+ tagName === "title" ||
280
+ tagName === "style" ||
281
+ tagName === "script" ||
282
+ tagName === "noscript"
283
+ ) {
284
+ if (currentTagName === tagName) {
285
+ state.stack.pop();
286
+ }
287
+ } else if (tagName.includes("-") && currentTagName === tagName) {
288
+ state.stack.pop();
289
+ }
290
+ } else if (token.type === TokenType.COMMENT) {
291
+ parseComment(state, token);
292
+ } else if (token.type === TokenType.TEXT && token.value.trim() === "") {
293
+ } else {
294
+ state.stack.pop();
295
+ state.insertionMode = InsertionMode.AfterHead;
296
+ parseToken(state, token);
297
+ }
298
+ };
299
+
300
+ const parseTokenInAfterHeadMode = (state: ParserState, token: Token): void => {
301
+ if (
302
+ token.type === TokenType.TAG_OPEN &&
303
+ token.value.toLowerCase() === "body"
304
+ ) {
305
+ const body = createElement("body", token.attributes || {});
306
+ appendChild(getCurrentParent(state), body);
307
+ state.root.body = body;
308
+ state.stack.push(body);
309
+ state.insertionMode = InsertionMode.InBody;
310
+ } else if (token.type === TokenType.COMMENT) {
311
+ parseComment(state, token);
312
+ } else if (token.type === TokenType.TEXT && token.value.trim() === "") {
313
+ } else {
314
+ const body = createElement("body", {});
315
+ appendChild(getCurrentParent(state), body);
316
+ state.root.body = body;
317
+ state.stack.push(body);
318
+ state.insertionMode = InsertionMode.InBody;
319
+ parseToken(state, token);
320
+ }
321
+ };
322
+
323
+ const SVG_NAMESPACE = "http://www.w3.org/2000/svg";
324
+ const MATHML_NAMESPACE = "http://www.w3.org/1998/Math/MathML";
325
+
326
+ const parseTokenInInBodyMode = (state: ParserState, token: Token): void => {
327
+ if (token.type === TokenType.TAG_OPEN) {
328
+ const tagName = token.value.toLowerCase();
329
+
330
+ handleAutoClosing(state, tagName);
331
+
332
+ reconstructActiveFormattingElements(state);
333
+
334
+ const currentParent = getCurrentParent(state);
335
+
336
+ let namespaceURI: string | undefined;
337
+ if (tagName === "svg") {
338
+ namespaceURI = SVG_NAMESPACE;
339
+ } else if (tagName === "math") {
340
+ namespaceURI = MATHML_NAMESPACE;
341
+ }
342
+
343
+ const element = createElement(
344
+ tagName,
345
+ token.attributes || {},
346
+ namespaceURI,
347
+ );
348
+
349
+ const inTableContext = isInTableContext(state);
350
+ const parentTagName = currentParent.tagName || "";
351
+ const isValidForParent = isValidChildForTableParent(parentTagName, tagName);
352
+ const isHiddenInput =
353
+ tagName === "input" &&
354
+ token.attributes &&
355
+ token.attributes.type &&
356
+ token.attributes.type.toLowerCase() === "hidden";
357
+ const isFormInTable = tagName === "form" && inTableContext;
358
+ const needsFosterParenting =
359
+ inTableContext &&
360
+ TABLE_CONTEXT_ELEMENTS.has(parentTagName.toLowerCase()) &&
361
+ !isValidForParent &&
362
+ !isHiddenInput &&
363
+ !isFormInTable;
364
+
365
+ if (needsFosterParenting) {
366
+ insertWithFosterParenting(state, element);
367
+ } else {
368
+ appendChild(currentParent, element);
369
+ }
370
+
371
+ if (!token.isSelfClosing && !VOID_ELEMENTS.has(tagName)) {
372
+ if (!isFormInTable) {
373
+ state.stack.push(element);
374
+ }
375
+
376
+ if (FORMATTING_ELEMENTS.has(tagName)) {
377
+ state.activeFormattingElements.push(element);
378
+ }
379
+ }
380
+ } else if (token.type === TokenType.TAG_CLOSE) {
381
+ const tagName = token.value.toLowerCase();
382
+
383
+ if (FORMATTING_ELEMENTS.has(tagName)) {
384
+ runAdoptionAgencyAlgorithm(state, tagName);
385
+ return;
386
+ }
387
+
388
+ const impliedEndTags = [
389
+ "dd",
390
+ "dt",
391
+ "li",
392
+ "option",
393
+ "optgroup",
394
+ "p",
395
+ "rb",
396
+ "rp",
397
+ "rt",
398
+ "rtc",
399
+ ];
400
+ while (state.stack.length > 1) {
401
+ const currentElement = getCurrentElement(state);
402
+ if (
403
+ !currentElement ||
404
+ !impliedEndTags.includes(currentElement.tagName.toLowerCase()) ||
405
+ currentElement.tagName.toLowerCase() === tagName
406
+ ) {
407
+ break;
408
+ }
409
+ state.stack.pop();
410
+ addError(
411
+ state,
412
+ `Implied end tag: ${currentElement.tagName}`,
413
+ token.position?.offset || 0,
414
+ );
415
+ }
416
+
417
+ const currentElement = getCurrentElement(state);
418
+ if (currentElement && currentElement.tagName.toLowerCase() === tagName) {
419
+ state.stack.pop();
420
+ } else {
421
+ addError(
422
+ state,
423
+ `Unmatched closing tag: ${tagName}`,
424
+ token.position?.offset || 0,
425
+ );
426
+ }
427
+ } else if (token.type === TokenType.TEXT) {
428
+ parseText(state, token);
429
+ } else if (token.type === TokenType.COMMENT) {
430
+ parseComment(state, token);
431
+ } else if (token.type === TokenType.CDATA) {
432
+ parseCDATA(state, token);
433
+ } else if (token.type === TokenType.DOCTYPE) {
434
+ } else if (token.type === TokenType.PROCESSING_INSTRUCTION) {
435
+ parseProcessingInstruction(state, token);
436
+ }
437
+ };
438
+
439
+ const runAdoptionAgencyAlgorithm = (
440
+ state: ParserState,
441
+ tagName: string,
442
+ ): void => {
443
+ const result = findFormattingElementInStack(state.stack, tagName);
444
+
445
+ if (!result) {
446
+ return;
447
+ }
448
+
449
+ const { element: formattingElement, index: formattingElementIndex } = result;
450
+
451
+ const currentElement = getCurrentElement(state);
452
+ if (currentElement === formattingElement) {
453
+ state.stack.pop();
454
+ removeFromActiveFormattingElements(state, formattingElement);
455
+ return;
456
+ }
457
+
458
+ const fbResult = findFurthestBlock(state.stack, formattingElementIndex);
459
+
460
+ if (!fbResult) {
461
+ while (state.stack.length > formattingElementIndex) {
462
+ state.stack.pop();
463
+ }
464
+ removeFromActiveFormattingElements(state, formattingElement);
465
+ return;
466
+ }
467
+
468
+ const { element: furthestBlock, index: furthestBlockIndex } = fbResult;
469
+ const commonAncestor = getCommonAncestor(state.stack, formattingElementIndex);
470
+
471
+ if (!commonAncestor) {
472
+ return;
473
+ }
474
+
475
+ let lastNode = furthestBlock;
476
+ const clonedNodes: any[] = [];
477
+
478
+ for (let i = furthestBlockIndex - 1; i > formattingElementIndex; i--) {
479
+ const node = state.stack[i];
480
+ const nodeClone = cloneFormattingElement(node);
481
+ clonedNodes.unshift(nodeClone);
482
+
483
+ replaceInActiveFormattingElements(state, node, nodeClone);
484
+
485
+ const nodeChildIdx = node.childNodes.indexOf(lastNode);
486
+ if (nodeChildIdx !== -1) {
487
+ node.childNodes.splice(nodeChildIdx, 1);
488
+ }
489
+
490
+ appendChild(nodeClone, lastNode);
491
+ lastNode = nodeClone;
492
+ }
493
+
494
+ const fbIdx = formattingElement.childNodes.indexOf(furthestBlock);
495
+ if (fbIdx !== -1) {
496
+ formattingElement.childNodes.splice(fbIdx, 1);
497
+ furthestBlock.parentNode = null;
498
+ }
499
+
500
+ appendChild(commonAncestor, lastNode);
501
+
502
+ const newFormattingElement = cloneFormattingElement(formattingElement);
503
+ reparentChildren(furthestBlock, newFormattingElement);
504
+ appendChild(furthestBlock, newFormattingElement);
505
+
506
+ removeFromActiveFormattingElements(state, formattingElement);
507
+
508
+ state.stack.length = formattingElementIndex;
509
+ for (const clonedNode of clonedNodes) {
510
+ state.stack.push(clonedNode);
511
+ }
512
+ state.stack.push(furthestBlock);
513
+ };
514
+
515
+ const removeFromActiveFormattingElements = (
516
+ state: ParserState,
517
+ element: any,
518
+ ): void => {
519
+ const index = state.activeFormattingElements.indexOf(element);
520
+ if (index !== -1) {
521
+ state.activeFormattingElements.splice(index, 1);
522
+ }
523
+ };
524
+
525
+ const replaceInActiveFormattingElements = (
526
+ state: ParserState,
527
+ oldElement: any,
528
+ newElement: any,
529
+ ): void => {
530
+ const index = state.activeFormattingElements.indexOf(oldElement);
531
+ if (index !== -1) {
532
+ state.activeFormattingElements[index] = newElement;
533
+ }
534
+ };
535
+
536
+ const parseText = (state: ParserState, token: Token): void => {
537
+ const content = token.value;
538
+
539
+ const preParent = getCurrentParent(state);
540
+ if (content.trim() === "" && shouldSkipWhitespace(preParent)) {
541
+ return;
542
+ }
543
+
544
+ reconstructActiveFormattingElements(state);
545
+
546
+ const textNode = createTextNode(content);
547
+
548
+ const inTableContext = isInTableContext(state);
549
+ const currentParent = getCurrentParent(state);
550
+ if (
551
+ inTableContext &&
552
+ currentParent.tagName &&
553
+ TABLE_CONTEXT_ELEMENTS.has(currentParent.tagName.toLowerCase())
554
+ ) {
555
+ insertWithFosterParenting(state, textNode);
556
+ } else {
557
+ appendChild(currentParent, textNode);
558
+ }
559
+ };
560
+
561
+ const parseComment = (state: ParserState, token: Token): void => {
562
+ const currentParent = getCurrentParent(state);
563
+
564
+ const commentNode = createComment(token.value);
565
+ appendChild(currentParent, commentNode);
566
+ };
567
+
568
+ const parseCDATA = (state: ParserState, token: Token): void => {
569
+ const currentParent = getCurrentParent(state);
570
+ const cdataNode = createCDATA(token.value);
571
+ appendChild(currentParent, cdataNode);
572
+ };
573
+
574
+ const parseDoctype = (state: ParserState, token: Token): void => {
575
+ const doctype = createDoctype(token.value || "html");
576
+ appendChild(state.root, doctype);
577
+ state.root.doctype = doctype;
578
+ };
579
+
580
+ const parseProcessingInstruction = (state: ParserState, token: Token): void => {
581
+ const currentParent = getCurrentParent(state);
582
+ const piNode = createProcessingInstruction(token.value);
583
+ appendChild(currentParent, piNode);
584
+ };
585
+
586
+ const handleAutoClosing = (state: ParserState, tagName: string): void => {
587
+ const autoCloseList = AUTO_CLOSE_RULES[tagName];
588
+ if (!autoCloseList) return;
589
+
590
+ const currentElement = getCurrentElement(state);
591
+ if (
592
+ currentElement &&
593
+ currentElement.tagName &&
594
+ autoCloseList.includes(currentElement.tagName.toLowerCase())
595
+ ) {
596
+ state.stack.pop();
597
+ }
598
+ };
599
+
600
+ const getCurrentParent = (state: ParserState): any => {
601
+ return state.stack[state.stack.length - 1];
602
+ };
603
+
604
+ const getCurrentElement = (state: ParserState): any => {
605
+ for (let i = state.stack.length - 1; i >= 0; i--) {
606
+ const element = state.stack[i];
607
+ if (element.nodeType === 1) {
608
+ return element;
609
+ }
610
+ }
611
+ return null;
612
+ };
613
+
614
+ const getCurrentToken = (state: ParserState): Token | null => {
615
+ return state.tokens[state.position] || null;
616
+ };
617
+
618
+ const advance = (state: ParserState): void => {
619
+ state.position++;
620
+ };
621
+
622
+ const addError = (
623
+ state: ParserState,
624
+ message: string,
625
+ position: number,
626
+ ): void => {
627
+ state.errors.push({
628
+ message,
629
+ position,
630
+ line: 0,
631
+ column: 0,
632
+ severity: "error",
633
+ });
634
+ };
635
+
636
+ const shouldSkipWhitespace = (parent: any): boolean => {
637
+ const skipWhitespaceIn = new Set([
638
+ "html",
639
+ "head",
640
+ "body",
641
+ "table",
642
+ "tbody",
643
+ "thead",
644
+ "tfoot",
645
+ "tr",
646
+ "ul",
647
+ "ol",
648
+ "dl",
649
+ "select",
650
+ "optgroup",
651
+ ]);
652
+
653
+ return parent.tagName ? skipWhitespaceIn.has(parent.tagName) : false;
654
+ };
655
+
656
+ const reconstructActiveFormattingElements = (state: ParserState): void => {
657
+ const list = state.activeFormattingElements;
658
+ if (list.length === 0) {
659
+ return;
660
+ }
661
+
662
+ let entryIndex = list.length - 1;
663
+ let entry = list[entryIndex];
664
+
665
+ if (entry === null || isInStack(state.stack, entry)) {
666
+ return;
667
+ }
668
+
669
+ while (entryIndex > 0) {
670
+ entryIndex--;
671
+ entry = list[entryIndex];
672
+ if (entry === null || isInStack(state.stack, entry)) {
673
+ entryIndex++;
674
+ break;
675
+ }
676
+ }
677
+
678
+ for (; entryIndex < list.length; entryIndex++) {
679
+ entry = list[entryIndex];
680
+ if (entry === null) {
681
+ continue;
682
+ }
683
+
684
+ const newElement = cloneFormattingElement(entry);
685
+ appendChild(getCurrentParent(state), newElement);
686
+ state.stack.push(newElement);
687
+ list[entryIndex] = newElement;
688
+ }
689
+ };
690
+
691
+ const isInStack = (stack: any[], element: any): boolean => {
692
+ for (let i = stack.length - 1; i >= 0; i--) {
693
+ if (stack[i] === element) {
694
+ return true;
695
+ }
696
+ }
697
+ return false;
698
+ };
699
+
700
+ const isInTableContext = (state: ParserState): boolean => {
701
+ for (let i = state.stack.length - 1; i >= 0; i--) {
702
+ const el = state.stack[i];
703
+ if (el.tagName && TABLE_CONTEXT_ELEMENTS.has(el.tagName.toLowerCase())) {
704
+ return true;
705
+ }
706
+ if (el.tagName && el.tagName.toLowerCase() === "html") {
707
+ return false;
708
+ }
709
+ }
710
+ return false;
711
+ };
712
+
713
+ const isValidChildForTableParent = (
714
+ parentTagName: string,
715
+ childTagName: string,
716
+ ): boolean => {
717
+ const parent = parentTagName.toLowerCase();
718
+ const child = childTagName.toLowerCase();
719
+
720
+ if (parent === "table") {
721
+ return VALID_TABLE_CHILDREN.has(child);
722
+ }
723
+ if (parent === "tbody" || parent === "thead" || parent === "tfoot") {
724
+ return VALID_TABLE_SECTION_CHILDREN.has(child);
725
+ }
726
+ if (parent === "tr") {
727
+ return VALID_TR_CHILDREN.has(child);
728
+ }
729
+ return true;
730
+ };
731
+
732
+ const findFosterParentTarget = (
733
+ state: ParserState,
734
+ ): { parent: any; before: any } | null => {
735
+ for (let i = state.stack.length - 1; i >= 0; i--) {
736
+ const el = state.stack[i];
737
+ if (el.tagName && el.tagName.toLowerCase() === "table") {
738
+ if (el.parentNode) {
739
+ return { parent: el.parentNode, before: el };
740
+ }
741
+ return { parent: state.stack[i - 1] || state.root, before: null };
742
+ }
743
+ }
744
+ return null;
745
+ };
746
+
747
+ const insertWithFosterParenting = (state: ParserState, node: any): void => {
748
+ const currentParent = getCurrentParent(state);
749
+ const inTableContext = isInTableContext(state);
750
+
751
+ if (
752
+ inTableContext &&
753
+ currentParent.tagName &&
754
+ TABLE_CONTEXT_ELEMENTS.has(currentParent.tagName.toLowerCase())
755
+ ) {
756
+ const target = findFosterParentTarget(state);
757
+ if (target) {
758
+ if (target.before) {
759
+ const idx = target.parent.childNodes.indexOf(target.before);
760
+ if (idx !== -1) {
761
+ node.parentNode = target.parent;
762
+ target.parent.childNodes.splice(idx, 0, node);
763
+ return;
764
+ }
765
+ }
766
+ appendChild(target.parent, node);
767
+ return;
768
+ }
769
+ }
770
+
771
+ appendChild(currentParent, node);
772
+ };