@tkeron/html-parser 1.1.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/.github/workflows/npm_deploy.yml +14 -4
  2. package/README.md +6 -6
  3. package/bun.lock +6 -8
  4. package/check-versions.ts +147 -0
  5. package/index.ts +4 -8
  6. package/package.json +5 -6
  7. package/src/dom-simulator/append-child.ts +130 -0
  8. package/src/dom-simulator/append.ts +18 -0
  9. package/src/dom-simulator/attributes.ts +23 -0
  10. package/src/dom-simulator/clone-node.ts +51 -0
  11. package/src/dom-simulator/convert-ast-node-to-dom.ts +37 -0
  12. package/src/dom-simulator/create-cdata.ts +18 -0
  13. package/src/dom-simulator/create-comment.ts +23 -0
  14. package/src/dom-simulator/create-doctype.ts +24 -0
  15. package/src/dom-simulator/create-document.ts +81 -0
  16. package/src/dom-simulator/create-element.ts +195 -0
  17. package/src/dom-simulator/create-processing-instruction.ts +19 -0
  18. package/src/dom-simulator/create-temp-parent.ts +9 -0
  19. package/src/dom-simulator/create-text-node.ts +23 -0
  20. package/src/dom-simulator/escape-text-content.ts +6 -0
  21. package/src/dom-simulator/find-special-elements.ts +14 -0
  22. package/src/dom-simulator/get-text-content.ts +18 -0
  23. package/src/dom-simulator/index.ts +36 -0
  24. package/src/dom-simulator/inner-outer-html.ts +182 -0
  25. package/src/dom-simulator/insert-after.ts +20 -0
  26. package/src/dom-simulator/insert-before.ts +108 -0
  27. package/src/dom-simulator/matches.ts +26 -0
  28. package/src/dom-simulator/node-types.ts +26 -0
  29. package/src/dom-simulator/prepend.ts +24 -0
  30. package/src/dom-simulator/remove-child.ts +68 -0
  31. package/src/dom-simulator/remove.ts +7 -0
  32. package/src/dom-simulator/replace-child.ts +152 -0
  33. package/src/dom-simulator/set-text-content.ts +33 -0
  34. package/src/dom-simulator/update-element-content.ts +56 -0
  35. package/src/dom-simulator.ts +12 -1126
  36. package/src/encoding/constants.ts +8 -0
  37. package/src/encoding/detect-encoding.ts +21 -0
  38. package/src/encoding/index.ts +1 -0
  39. package/src/encoding/normalize-encoding.ts +6 -0
  40. package/src/html-entities.ts +2127 -0
  41. package/src/index.ts +5 -5
  42. package/src/parser/adoption-agency-helpers.ts +145 -0
  43. package/src/parser/constants.ts +137 -0
  44. package/src/parser/dom-to-ast.ts +79 -0
  45. package/src/parser/foster-parenting-helpers.ts +48 -0
  46. package/src/parser/implicit-table-structure.ts +65 -0
  47. package/src/parser/index.ts +9 -0
  48. package/src/parser/parse.ts +924 -0
  49. package/src/parser/types.ts +56 -0
  50. package/src/selectors/find-elements-descendant.ts +47 -0
  51. package/src/selectors/index.ts +2 -0
  52. package/src/selectors/matches-selector.ts +12 -0
  53. package/src/selectors/matches-token.ts +27 -0
  54. package/src/selectors/parse-selector.ts +48 -0
  55. package/src/selectors/query-selector-all.ts +43 -0
  56. package/src/selectors/query-selector.ts +6 -0
  57. package/src/selectors/types.ts +10 -0
  58. package/src/serializer/attributes.ts +74 -0
  59. package/src/serializer/escape.ts +13 -0
  60. package/src/serializer/index.ts +1 -0
  61. package/src/serializer/serialize-tokens.ts +511 -0
  62. package/src/tokenizer/calculate-position.ts +10 -0
  63. package/src/tokenizer/constants.ts +11 -0
  64. package/src/tokenizer/decode-entities.ts +64 -0
  65. package/src/tokenizer/index.ts +2 -0
  66. package/src/tokenizer/parse-attributes.ts +74 -0
  67. package/src/tokenizer/tokenize.ts +165 -0
  68. package/src/tokenizer/types.ts +25 -0
  69. package/tests/adoption-agency-helpers.test.ts +304 -0
  70. package/tests/advanced.test.ts +242 -221
  71. package/tests/cloneNode.test.ts +19 -66
  72. package/tests/custom-elements-head.test.ts +54 -55
  73. package/tests/dom-extended.test.ts +77 -64
  74. package/tests/dom-manipulation.test.ts +51 -24
  75. package/tests/dom.test.ts +15 -13
  76. package/tests/encoding/detect-encoding.test.ts +33 -0
  77. package/tests/foster-parenting.test.ts +127 -0
  78. package/tests/google-dom.test.ts +2 -2
  79. package/tests/helpers/tokenizer-adapter.test.ts +29 -43
  80. package/tests/helpers/tokenizer-adapter.ts +36 -33
  81. package/tests/helpers/tree-adapter.test.ts +20 -20
  82. package/tests/helpers/tree-adapter.ts +34 -24
  83. package/tests/html-entities-text.test.ts +6 -2
  84. package/tests/innerhtml-void-elements.test.ts +52 -36
  85. package/tests/outerHTML-replacement.test.ts +37 -65
  86. package/tests/parser/dom-to-ast.test.ts +109 -0
  87. package/tests/parser/parse.test.ts +139 -0
  88. package/tests/parser.test.ts +281 -217
  89. package/tests/selectors/query-selector-all.test.ts +39 -0
  90. package/tests/selectors/query-selector.test.ts +42 -0
  91. package/tests/serializer/attributes.test.ts +132 -0
  92. package/tests/serializer/escape.test.ts +51 -0
  93. package/tests/serializer/serialize-tokens.test.ts +80 -0
  94. package/tests/serializer-core.test.ts +6 -6
  95. package/tests/serializer-injectmeta.test.ts +6 -6
  96. package/tests/serializer-optionaltags.test.ts +9 -6
  97. package/tests/serializer-options.test.ts +6 -6
  98. package/tests/serializer-whitespace.test.ts +6 -6
  99. package/tests/tokenizer/calculate-position.test.ts +34 -0
  100. package/tests/tokenizer/decode-entities.test.ts +31 -0
  101. package/tests/tokenizer/parse-attributes.test.ts +44 -0
  102. package/tests/tokenizer/tokenize.test.ts +757 -0
  103. package/tests/tokenizer-namedEntities.test.ts +10 -7
  104. package/tests/tokenizer-pendingSpecChanges.test.ts +10 -7
  105. package/tests/tokenizer.test.ts +268 -256
  106. package/tests/tree-construction-adoption01.test.ts +25 -16
  107. package/tests/tree-construction-adoption02.test.ts +30 -19
  108. package/tests/tree-construction-domjs-unsafe.test.ts +6 -4
  109. package/tests/tree-construction-entities02.test.ts +18 -16
  110. package/tests/tree-construction-html5test-com.test.ts +16 -10
  111. package/tests/tree-construction-math.test.ts +11 -9
  112. package/tests/tree-construction-namespace-sensitivity.test.ts +11 -9
  113. package/tests/tree-construction-noscript01.test.ts +11 -9
  114. package/tests/tree-construction-ruby.test.ts +6 -4
  115. package/tests/tree-construction-scriptdata01.test.ts +6 -4
  116. package/tests/tree-construction-svg.test.ts +6 -4
  117. package/tests/tree-construction-template.test.ts +6 -4
  118. package/tests/tree-construction-tests10.test.ts +6 -4
  119. package/tests/tree-construction-tests11.test.ts +6 -4
  120. package/tests/tree-construction-tests20.test.ts +7 -4
  121. package/tests/tree-construction-tests21.test.ts +7 -4
  122. package/tests/tree-construction-tests23.test.ts +7 -4
  123. package/tests/tree-construction-tests24.test.ts +7 -4
  124. package/tests/tree-construction-tests5.test.ts +6 -5
  125. package/tests/tree-construction-tests6.test.ts +6 -5
  126. package/tests/tree-construction-tests_innerHTML_1.test.ts +6 -5
  127. package/tests/void-elements.test.ts +85 -40
  128. package/tsconfig.json +1 -1
  129. package/src/css-selector.ts +0 -185
  130. package/src/encoding.ts +0 -39
  131. package/src/parser.ts +0 -682
  132. package/src/serializer.ts +0 -450
  133. package/src/tokenizer.ts +0 -325
  134. package/tests/selectors.test.ts +0 -128
@@ -0,0 +1,924 @@
1
+ import type { Token } from "../tokenizer/index";
2
+ import { TokenType } from "../tokenizer/index";
3
+ import {
4
+ createDocument,
5
+ createElement,
6
+ createTextNode,
7
+ createComment,
8
+ createCDATA,
9
+ createProcessingInstruction,
10
+ createDoctype,
11
+ appendChild,
12
+ } from "../dom-simulator/index.js";
13
+ import type { ParserState } from "./types";
14
+ import { InsertionMode } from "./types";
15
+ import {
16
+ VOID_ELEMENTS,
17
+ RAW_TEXT_ELEMENTS,
18
+ AUTO_CLOSE_RULES,
19
+ FORMATTING_ELEMENTS,
20
+ TABLE_CONTEXT_ELEMENTS,
21
+ VALID_TABLE_CHILDREN,
22
+ VALID_TABLE_SECTION_CHILDREN,
23
+ VALID_TR_CHILDREN,
24
+ } from "./constants";
25
+ import {
26
+ findFormattingElementInStack,
27
+ findFurthestBlock,
28
+ getCommonAncestor,
29
+ cloneFormattingElement,
30
+ reparentChildren,
31
+ } from "./adoption-agency-helpers.js";
32
+ import {
33
+ shouldCreateImplicitTableStructure,
34
+ createImplicitTableStructure,
35
+ CELL_ELEMENTS,
36
+ } from "./implicit-table-structure.js";
37
+ import { mergeAdjacentTextNodes } from "./foster-parenting-helpers.js";
38
+
39
+ export const parse = (tokens: Token[]): any => {
40
+ const state = createParserState(tokens);
41
+
42
+ while (state.position < state.length) {
43
+ const token = getCurrentToken(state);
44
+
45
+ if (!token || token.type === TokenType.EOF) {
46
+ break;
47
+ }
48
+
49
+ parseToken(state, token);
50
+ advance(state);
51
+ }
52
+
53
+ let hasHtml = false;
54
+ for (const child of state.root.childNodes) {
55
+ if (child.nodeType === 1 && child.tagName === "HTML") {
56
+ hasHtml = true;
57
+ state.root.documentElement = child;
58
+ break;
59
+ }
60
+ }
61
+ if (!hasHtml) {
62
+ const html = createElement("html", {});
63
+ const head = createElement("head", {});
64
+ const body = createElement("body", {});
65
+ appendChild(html, head);
66
+ appendChild(html, body);
67
+
68
+ const doctypes: any[] = [];
69
+ const commentsBeforeHtml: any[] = [];
70
+ const bodyContent: any[] = [];
71
+ const children = [...state.root.childNodes];
72
+
73
+ let foundElement = false;
74
+ for (const child of children) {
75
+ if (child.nodeType === 10) {
76
+ doctypes.push(child);
77
+ } else if (child.nodeType === 8 && !foundElement) {
78
+ commentsBeforeHtml.push(child);
79
+ } else {
80
+ if (child.nodeType === 1) foundElement = true;
81
+ bodyContent.push(child);
82
+ }
83
+ }
84
+
85
+ for (const content of bodyContent) {
86
+ appendChild(body, content);
87
+ }
88
+
89
+ state.root.childNodes = [];
90
+ for (const doctype of doctypes) {
91
+ doctype.parentNode = null;
92
+ appendChild(state.root, doctype);
93
+ }
94
+ for (const comment of commentsBeforeHtml) {
95
+ comment.parentNode = null;
96
+ appendChild(state.root, comment);
97
+ }
98
+ appendChild(state.root, html);
99
+ state.root.documentElement = html;
100
+ state.root.head = head;
101
+ state.root.body = body;
102
+ }
103
+
104
+ while (state.stack.length > 1) {
105
+ const unclosedElement = state.stack.pop()!;
106
+ const currentToken = getCurrentToken(state);
107
+ addError(
108
+ state,
109
+ `Unclosed tag: ${unclosedElement.tagName}`,
110
+ currentToken?.position?.offset || 0,
111
+ );
112
+ }
113
+
114
+ return state.root;
115
+ };
116
+
117
+ const createParserState = (tokens: Token[]): ParserState => {
118
+ const root = createDocument();
119
+
120
+ return {
121
+ tokens,
122
+ position: 0,
123
+ length: tokens.length,
124
+ stack: [root],
125
+ root,
126
+ insertionMode: InsertionMode.Initial,
127
+ errors: [],
128
+ activeFormattingElements: [],
129
+ };
130
+ };
131
+
132
+ const parseToken = (state: ParserState, token: Token): void => {
133
+ switch (state.insertionMode) {
134
+ case InsertionMode.Initial:
135
+ parseTokenInInitialMode(state, token);
136
+ break;
137
+ case InsertionMode.BeforeHtml:
138
+ parseTokenInBeforeHtmlMode(state, token);
139
+ break;
140
+ case InsertionMode.BeforeHead:
141
+ parseTokenInBeforeHeadMode(state, token);
142
+ break;
143
+ case InsertionMode.InHead:
144
+ parseTokenInInHeadMode(state, token);
145
+ break;
146
+ case InsertionMode.AfterHead:
147
+ parseTokenInAfterHeadMode(state, token);
148
+ break;
149
+ case InsertionMode.InBody:
150
+ parseTokenInInBodyMode(state, token);
151
+ break;
152
+ default:
153
+ parseTokenInInBodyMode(state, token);
154
+ }
155
+ };
156
+
157
+ const parseTokenInInitialMode = (state: ParserState, token: Token): void => {
158
+ if (token.type === TokenType.DOCTYPE) {
159
+ parseDoctype(state, token);
160
+ state.insertionMode = InsertionMode.BeforeHtml;
161
+ } else if (token.type === TokenType.COMMENT) {
162
+ parseComment(state, token);
163
+ } else if (token.type === TokenType.TEXT && token.value.trim() === "") {
164
+ } else {
165
+ const doctype = createDoctype("html");
166
+ appendChild(state.root, doctype);
167
+ state.insertionMode = InsertionMode.BeforeHtml;
168
+ parseToken(state, token);
169
+ }
170
+ };
171
+
172
+ const parseTokenInBeforeHtmlMode = (state: ParserState, token: Token): void => {
173
+ if (
174
+ token.type === TokenType.TAG_OPEN &&
175
+ token.value.toLowerCase() === "html"
176
+ ) {
177
+ const html = createElement("html", token.attributes || {});
178
+ appendChild(state.root, html);
179
+ state.root.documentElement = html;
180
+ state.stack.push(html);
181
+ state.insertionMode = InsertionMode.BeforeHead;
182
+ } else if (token.type === TokenType.COMMENT) {
183
+ parseComment(state, token);
184
+ } else if (token.type === TokenType.DOCTYPE) {
185
+ } else if (token.type === TokenType.TEXT && token.value.trim() === "") {
186
+ } else {
187
+ const html = createElement("html", {});
188
+ appendChild(state.root, html);
189
+ state.root.documentElement = html;
190
+ state.stack.push(html);
191
+ state.insertionMode = InsertionMode.BeforeHead;
192
+ parseToken(state, token);
193
+ }
194
+ };
195
+
196
+ const parseTokenInBeforeHeadMode = (state: ParserState, token: Token): void => {
197
+ if (
198
+ token.type === TokenType.TAG_OPEN &&
199
+ token.value.toLowerCase() === "head"
200
+ ) {
201
+ const head = createElement("head", token.attributes || {});
202
+ appendChild(getCurrentParent(state), head);
203
+ state.root.head = head;
204
+ state.stack.push(head);
205
+ state.insertionMode = InsertionMode.InHead;
206
+ state.explicitHead = true;
207
+ } else if (token.type === TokenType.COMMENT) {
208
+ parseComment(state, token);
209
+ } else if (token.type === TokenType.TEXT && token.value.trim() === "") {
210
+ } else {
211
+ const head = createElement("head", {});
212
+ appendChild(getCurrentParent(state), head);
213
+ state.root.head = head;
214
+ state.stack.push(head);
215
+ state.insertionMode = InsertionMode.InHead;
216
+ state.explicitHead = false;
217
+ parseToken(state, token);
218
+ }
219
+ };
220
+
221
+ const parseOpenTag = (state: ParserState, token: Token): void => {
222
+ const tagName = token.value.toLowerCase();
223
+ const currentParent = getCurrentParent(state);
224
+ const element = createElement(
225
+ tagName,
226
+ token.attributes || {},
227
+ undefined,
228
+ token.isSelfClosing,
229
+ );
230
+ appendChild(currentParent, element);
231
+
232
+ if (!token.isSelfClosing && !VOID_ELEMENTS.has(tagName)) {
233
+ state.stack.push(element);
234
+ }
235
+ };
236
+
237
+ const parseTokenInInHeadMode = (state: ParserState, token: Token): void => {
238
+ const currentElement = getCurrentElement(state);
239
+ const currentTagName = currentElement?.tagName?.toLowerCase();
240
+
241
+ if (RAW_TEXT_ELEMENTS.has(currentTagName)) {
242
+ if (token.type === TokenType.TEXT) {
243
+ parseText(state, token);
244
+ return;
245
+ } else if (
246
+ token.type === TokenType.TAG_CLOSE &&
247
+ token.value.toLowerCase() === currentTagName
248
+ ) {
249
+ state.stack.pop();
250
+ return;
251
+ }
252
+ }
253
+
254
+ if (token.type === TokenType.TAG_OPEN) {
255
+ const tagName = token.value.toLowerCase();
256
+ if (
257
+ tagName === "title" ||
258
+ tagName === "style" ||
259
+ tagName === "script" ||
260
+ tagName === "noscript"
261
+ ) {
262
+ parseOpenTag(state, token);
263
+ } else if (tagName === "meta" || tagName === "link" || tagName === "base") {
264
+ parseOpenTag(state, token);
265
+ } else if (tagName === "head") {
266
+ } else if (tagName.includes("-")) {
267
+ if (state.explicitHead) {
268
+ parseOpenTag(state, token);
269
+ } else {
270
+ state.stack.pop();
271
+ state.insertionMode = InsertionMode.AfterHead;
272
+ parseToken(state, token);
273
+ }
274
+ } else {
275
+ state.stack.pop();
276
+ state.insertionMode = InsertionMode.AfterHead;
277
+ parseToken(state, token);
278
+ }
279
+ } else if (token.type === TokenType.TAG_CLOSE) {
280
+ const tagName = token.value.toLowerCase();
281
+ if (tagName === "head") {
282
+ state.stack.pop();
283
+ state.insertionMode = InsertionMode.AfterHead;
284
+ } else if (
285
+ tagName === "title" ||
286
+ tagName === "style" ||
287
+ tagName === "script" ||
288
+ tagName === "noscript"
289
+ ) {
290
+ if (currentTagName === tagName) {
291
+ state.stack.pop();
292
+ }
293
+ } else if (tagName.includes("-") && currentTagName === tagName) {
294
+ state.stack.pop();
295
+ }
296
+ } else if (token.type === TokenType.COMMENT) {
297
+ parseComment(state, token);
298
+ } else if (token.type === TokenType.TEXT && token.value.trim() === "") {
299
+ } else {
300
+ state.stack.pop();
301
+ state.insertionMode = InsertionMode.AfterHead;
302
+ parseToken(state, token);
303
+ }
304
+ };
305
+
306
+ const parseTokenInAfterHeadMode = (state: ParserState, token: Token): void => {
307
+ if (
308
+ token.type === TokenType.TAG_OPEN &&
309
+ token.value.toLowerCase() === "body"
310
+ ) {
311
+ const body = createElement("body", token.attributes || {});
312
+ appendChild(getCurrentParent(state), body);
313
+ state.root.body = body;
314
+ state.stack.push(body);
315
+ state.insertionMode = InsertionMode.InBody;
316
+ } else if (token.type === TokenType.COMMENT) {
317
+ parseComment(state, token);
318
+ } else if (token.type === TokenType.TEXT && token.value.trim() === "") {
319
+ } else {
320
+ const body = createElement("body", {});
321
+ appendChild(getCurrentParent(state), body);
322
+ state.root.body = body;
323
+ state.stack.push(body);
324
+ state.insertionMode = InsertionMode.InBody;
325
+ parseToken(state, token);
326
+ }
327
+ };
328
+
329
+ const SVG_NAMESPACE = "http://www.w3.org/2000/svg";
330
+ const MATHML_NAMESPACE = "http://www.w3.org/1998/Math/MathML";
331
+
332
+ const parseTokenInInBodyMode = (state: ParserState, token: Token): void => {
333
+ if (token.type === TokenType.TAG_OPEN) {
334
+ const tagName = token.value.toLowerCase();
335
+
336
+ handleAutoClosing(state, tagName);
337
+
338
+ const inTableContext = isInTableContext(state);
339
+ const isTableStructureElement =
340
+ CELL_ELEMENTS.has(tagName) ||
341
+ tagName === "tr" ||
342
+ tagName === "tbody" ||
343
+ tagName === "thead" ||
344
+ tagName === "tfoot";
345
+ const currentStackParent = getCurrentParent(state);
346
+ const currentStackParentTag =
347
+ currentStackParent.tagName?.toLowerCase() || "";
348
+ const parentIsTableContext = TABLE_CONTEXT_ELEMENTS.has(
349
+ currentStackParentTag,
350
+ );
351
+
352
+ if (inTableContext && isTableStructureElement) {
353
+ const tableParent = findTableContextParent(state);
354
+ if (tableParent) {
355
+ popStackUntilTableContext(state);
356
+ }
357
+ } else if (!parentIsTableContext) {
358
+ reconstructActiveFormattingElements(state);
359
+ }
360
+
361
+ let currentParent = getCurrentParent(state);
362
+
363
+ let namespaceURI: string | undefined;
364
+ if (tagName === "svg") {
365
+ namespaceURI = SVG_NAMESPACE;
366
+ } else if (tagName === "math") {
367
+ namespaceURI = MATHML_NAMESPACE;
368
+ }
369
+
370
+ const element = createElement(
371
+ tagName,
372
+ token.attributes || {},
373
+ namespaceURI,
374
+ );
375
+
376
+ let parentTagName = currentParent.tagName || "";
377
+
378
+ const isValidForParent = isValidChildForTableParent(parentTagName, tagName);
379
+ const isHiddenInput =
380
+ tagName === "input" &&
381
+ token.attributes &&
382
+ token.attributes.type &&
383
+ token.attributes.type.toLowerCase() === "hidden";
384
+ const isFormInTable = tagName === "form" && inTableContext;
385
+
386
+ const needsImplicitStructure =
387
+ inTableContext &&
388
+ shouldCreateImplicitTableStructure(parentTagName, tagName);
389
+
390
+ const needsFosterParenting =
391
+ inTableContext &&
392
+ TABLE_CONTEXT_ELEMENTS.has(parentTagName.toLowerCase()) &&
393
+ !isValidForParent &&
394
+ !isHiddenInput &&
395
+ !isFormInTable &&
396
+ !needsImplicitStructure;
397
+
398
+ if (needsImplicitStructure) {
399
+ createImplicitTableStructure(state.stack, parentTagName, tagName);
400
+ appendChild(getCurrentParent(state), element);
401
+ } else if (needsFosterParenting) {
402
+ insertWithFosterParenting(state, element);
403
+ } else {
404
+ appendChild(currentParent, element);
405
+ }
406
+
407
+ const wasFosterParented = needsFosterParenting;
408
+ const isFormattingElement = FORMATTING_ELEMENTS.has(tagName);
409
+
410
+ if (!token.isSelfClosing && !VOID_ELEMENTS.has(tagName)) {
411
+ if (!isFormInTable && !(wasFosterParented && isFormattingElement)) {
412
+ state.stack.push(element);
413
+ }
414
+
415
+ if (isFormattingElement) {
416
+ state.activeFormattingElements.push(element);
417
+ }
418
+ }
419
+ } else if (token.type === TokenType.TAG_CLOSE) {
420
+ const tagName = token.value.toLowerCase();
421
+
422
+ if (FORMATTING_ELEMENTS.has(tagName)) {
423
+ runAdoptionAgencyAlgorithm(state, tagName);
424
+ return;
425
+ }
426
+
427
+ const impliedEndTags = [
428
+ "dd",
429
+ "dt",
430
+ "li",
431
+ "option",
432
+ "optgroup",
433
+ "p",
434
+ "rb",
435
+ "rp",
436
+ "rt",
437
+ "rtc",
438
+ ];
439
+ while (state.stack.length > 1) {
440
+ const currentElement = getCurrentElement(state);
441
+ if (
442
+ !currentElement ||
443
+ !impliedEndTags.includes(currentElement.tagName.toLowerCase()) ||
444
+ currentElement.tagName.toLowerCase() === tagName
445
+ ) {
446
+ break;
447
+ }
448
+ state.stack.pop();
449
+ addError(
450
+ state,
451
+ `Implied end tag: ${currentElement.tagName}`,
452
+ token.position?.offset || 0,
453
+ );
454
+ }
455
+
456
+ const currentElement = getCurrentElement(state);
457
+ if (currentElement && currentElement.tagName.toLowerCase() === tagName) {
458
+ state.stack.pop();
459
+ } else {
460
+ addError(
461
+ state,
462
+ `Unmatched closing tag: ${tagName}`,
463
+ token.position?.offset || 0,
464
+ );
465
+ }
466
+ } else if (token.type === TokenType.TEXT) {
467
+ parseText(state, token);
468
+ } else if (token.type === TokenType.COMMENT) {
469
+ parseComment(state, token);
470
+ } else if (token.type === TokenType.CDATA) {
471
+ parseCDATA(state, token);
472
+ } else if (token.type === TokenType.DOCTYPE) {
473
+ } else if (token.type === TokenType.PROCESSING_INSTRUCTION) {
474
+ parseProcessingInstruction(state, token);
475
+ }
476
+ };
477
+
478
+ const runAdoptionAgencyAlgorithm = (
479
+ state: ParserState,
480
+ tagName: string,
481
+ ): void => {
482
+ const result = findFormattingElementInStack(state.stack, tagName);
483
+
484
+ if (!result) {
485
+ return;
486
+ }
487
+
488
+ const { element: formattingElement, index: formattingElementIndex } = result;
489
+
490
+ const currentElement = getCurrentElement(state);
491
+ if (currentElement === formattingElement) {
492
+ state.stack.pop();
493
+ removeFromActiveFormattingElements(state, formattingElement);
494
+ return;
495
+ }
496
+
497
+ const fbResult = findFurthestBlock(state.stack, formattingElementIndex);
498
+
499
+ if (!fbResult) {
500
+ while (state.stack.length > formattingElementIndex) {
501
+ state.stack.pop();
502
+ }
503
+ removeFromActiveFormattingElements(state, formattingElement);
504
+ return;
505
+ }
506
+
507
+ const { element: furthestBlock, index: furthestBlockIndex } = fbResult;
508
+ const commonAncestor = getCommonAncestor(state.stack, formattingElementIndex);
509
+
510
+ if (!commonAncestor) {
511
+ return;
512
+ }
513
+
514
+ let lastNode = furthestBlock;
515
+ const clonedNodes: any[] = [];
516
+
517
+ for (let i = furthestBlockIndex - 1; i > formattingElementIndex; i--) {
518
+ const node = state.stack[i];
519
+ const nodeClone = cloneFormattingElement(node);
520
+ clonedNodes.unshift(nodeClone);
521
+
522
+ replaceInActiveFormattingElements(state, node, nodeClone);
523
+
524
+ const nodeChildIdx = node.childNodes.indexOf(lastNode);
525
+ if (nodeChildIdx !== -1) {
526
+ node.childNodes.splice(nodeChildIdx, 1);
527
+ }
528
+
529
+ appendChild(nodeClone, lastNode);
530
+ lastNode = nodeClone;
531
+ }
532
+
533
+ const fbIdx = formattingElement.childNodes.indexOf(furthestBlock);
534
+ if (fbIdx !== -1) {
535
+ formattingElement.childNodes.splice(fbIdx, 1);
536
+ furthestBlock.parentNode = null;
537
+ }
538
+
539
+ appendChild(commonAncestor, lastNode);
540
+
541
+ const newFormattingElement = cloneFormattingElement(formattingElement);
542
+ reparentChildren(furthestBlock, newFormattingElement);
543
+ appendChild(furthestBlock, newFormattingElement);
544
+
545
+ removeFromActiveFormattingElements(state, formattingElement);
546
+
547
+ state.stack.length = formattingElementIndex;
548
+ for (const clonedNode of clonedNodes) {
549
+ state.stack.push(clonedNode);
550
+ }
551
+ state.stack.push(furthestBlock);
552
+ };
553
+
554
+ const removeFromActiveFormattingElements = (
555
+ state: ParserState,
556
+ element: any,
557
+ ): void => {
558
+ const index = state.activeFormattingElements.indexOf(element);
559
+ if (index !== -1) {
560
+ state.activeFormattingElements.splice(index, 1);
561
+ }
562
+ };
563
+
564
+ const replaceInActiveFormattingElements = (
565
+ state: ParserState,
566
+ oldElement: any,
567
+ newElement: any,
568
+ ): void => {
569
+ const index = state.activeFormattingElements.indexOf(oldElement);
570
+ if (index !== -1) {
571
+ state.activeFormattingElements[index] = newElement;
572
+ }
573
+ };
574
+
575
+ const parseText = (state: ParserState, token: Token): void => {
576
+ const content = token.value;
577
+
578
+ const preParent = getCurrentParent(state);
579
+ if (content.trim() === "" && shouldSkipWhitespace(preParent)) {
580
+ return;
581
+ }
582
+
583
+ const textNode = createTextNode(content);
584
+
585
+ const inTableContext = isInTableContext(state);
586
+ const currentParent = getCurrentParent(state);
587
+ if (
588
+ inTableContext &&
589
+ currentParent.tagName &&
590
+ TABLE_CONTEXT_ELEMENTS.has(currentParent.tagName.toLowerCase())
591
+ ) {
592
+ insertWithFosterParentingAndReconstruct(state, textNode);
593
+ } else {
594
+ reconstructActiveFormattingElements(state);
595
+ appendChild(getCurrentParent(state), textNode);
596
+ }
597
+ };
598
+
599
+ const parseComment = (state: ParserState, token: Token): void => {
600
+ const currentParent = getCurrentParent(state);
601
+
602
+ const commentNode = createComment(token.value);
603
+ appendChild(currentParent, commentNode);
604
+ };
605
+
606
+ const parseCDATA = (state: ParserState, token: Token): void => {
607
+ const currentParent = getCurrentParent(state);
608
+ const cdataNode = createCDATA(token.value);
609
+ appendChild(currentParent, cdataNode);
610
+ };
611
+
612
+ const parseDoctype = (state: ParserState, token: Token): void => {
613
+ const doctype = createDoctype(token.value || "html");
614
+ appendChild(state.root, doctype);
615
+ state.root.doctype = doctype;
616
+ };
617
+
618
+ const parseProcessingInstruction = (state: ParserState, token: Token): void => {
619
+ const currentParent = getCurrentParent(state);
620
+ const piNode = createProcessingInstruction(token.value);
621
+ appendChild(currentParent, piNode);
622
+ };
623
+
624
+ const handleAutoClosing = (state: ParserState, tagName: string): void => {
625
+ const autoCloseList = AUTO_CLOSE_RULES[tagName];
626
+ if (!autoCloseList) return;
627
+
628
+ const currentElement = getCurrentElement(state);
629
+ if (
630
+ currentElement &&
631
+ currentElement.tagName &&
632
+ autoCloseList.includes(currentElement.tagName.toLowerCase())
633
+ ) {
634
+ state.stack.pop();
635
+ }
636
+ };
637
+
638
+ const getCurrentParent = (state: ParserState): any => {
639
+ return state.stack[state.stack.length - 1];
640
+ };
641
+
642
+ const getCurrentElement = (state: ParserState): any => {
643
+ for (let i = state.stack.length - 1; i >= 0; i--) {
644
+ const element = state.stack[i];
645
+ if (element.nodeType === 1) {
646
+ return element;
647
+ }
648
+ }
649
+ return null;
650
+ };
651
+
652
+ const getCurrentToken = (state: ParserState): Token | null => {
653
+ return state.tokens[state.position] || null;
654
+ };
655
+
656
+ const advance = (state: ParserState): void => {
657
+ state.position++;
658
+ };
659
+
660
+ const addError = (
661
+ state: ParserState,
662
+ message: string,
663
+ position: number,
664
+ ): void => {
665
+ state.errors.push({
666
+ message,
667
+ position,
668
+ line: 0,
669
+ column: 0,
670
+ severity: "error",
671
+ });
672
+ };
673
+
674
+ const shouldSkipWhitespace = (parent: any): boolean => {
675
+ const skipWhitespaceIn = new Set([
676
+ "html",
677
+ "head",
678
+ "body",
679
+ "table",
680
+ "tbody",
681
+ "thead",
682
+ "tfoot",
683
+ "tr",
684
+ "ul",
685
+ "ol",
686
+ "dl",
687
+ "select",
688
+ "optgroup",
689
+ ]);
690
+
691
+ return parent.tagName ? skipWhitespaceIn.has(parent.tagName) : false;
692
+ };
693
+
694
+ const reconstructActiveFormattingElements = (state: ParserState): void => {
695
+ const list = state.activeFormattingElements;
696
+ if (list.length === 0) {
697
+ return;
698
+ }
699
+
700
+ let entryIndex = list.length - 1;
701
+ let entry = list[entryIndex];
702
+
703
+ if (entry === null || isInStack(state.stack, entry)) {
704
+ return;
705
+ }
706
+
707
+ while (entryIndex > 0) {
708
+ entryIndex--;
709
+ entry = list[entryIndex];
710
+ if (entry === null || isInStack(state.stack, entry)) {
711
+ entryIndex++;
712
+ break;
713
+ }
714
+ }
715
+
716
+ for (; entryIndex < list.length; entryIndex++) {
717
+ entry = list[entryIndex];
718
+ if (entry === null) {
719
+ continue;
720
+ }
721
+
722
+ const newElement = cloneFormattingElement(entry);
723
+ appendChild(getCurrentParent(state), newElement);
724
+ state.stack.push(newElement);
725
+ list[entryIndex] = newElement;
726
+ }
727
+ };
728
+
729
+ const isInStack = (stack: any[], element: any): boolean => {
730
+ for (let i = stack.length - 1; i >= 0; i--) {
731
+ if (stack[i] === element) {
732
+ return true;
733
+ }
734
+ }
735
+ return false;
736
+ };
737
+
738
+ const isInTableContext = (state: ParserState): boolean => {
739
+ for (let i = state.stack.length - 1; i >= 0; i--) {
740
+ const el = state.stack[i];
741
+ if (el.tagName && TABLE_CONTEXT_ELEMENTS.has(el.tagName.toLowerCase())) {
742
+ return true;
743
+ }
744
+ if (el.tagName && el.tagName.toLowerCase() === "html") {
745
+ return false;
746
+ }
747
+ }
748
+ return false;
749
+ };
750
+
751
+ const findTableContextParent = (state: ParserState): any | null => {
752
+ for (let i = state.stack.length - 1; i >= 0; i--) {
753
+ const el = state.stack[i];
754
+ if (el.tagName && TABLE_CONTEXT_ELEMENTS.has(el.tagName.toLowerCase())) {
755
+ return el;
756
+ }
757
+ }
758
+ return null;
759
+ };
760
+
761
+ const popStackUntilTableContext = (state: ParserState): void => {
762
+ while (state.stack.length > 1) {
763
+ const el = getCurrentElement(state);
764
+ if (
765
+ el &&
766
+ el.tagName &&
767
+ TABLE_CONTEXT_ELEMENTS.has(el.tagName.toLowerCase())
768
+ ) {
769
+ break;
770
+ }
771
+ state.stack.pop();
772
+ }
773
+ state.activeFormattingElements.push(null);
774
+ };
775
+
776
+ const isValidChildForTableParent = (
777
+ parentTagName: string,
778
+ childTagName: string,
779
+ ): boolean => {
780
+ const parent = parentTagName.toLowerCase();
781
+ const child = childTagName.toLowerCase();
782
+
783
+ if (parent === "table") {
784
+ return VALID_TABLE_CHILDREN.has(child);
785
+ }
786
+ if (parent === "tbody" || parent === "thead" || parent === "tfoot") {
787
+ return VALID_TABLE_SECTION_CHILDREN.has(child);
788
+ }
789
+ if (parent === "tr") {
790
+ return VALID_TR_CHILDREN.has(child);
791
+ }
792
+ return true;
793
+ };
794
+
795
+ const findFosterParentTarget = (
796
+ state: ParserState,
797
+ ): { parent: any; before: any } | null => {
798
+ for (let i = state.stack.length - 1; i >= 0; i--) {
799
+ const el = state.stack[i];
800
+ if (el.tagName && el.tagName.toLowerCase() === "table") {
801
+ if (el.parentNode) {
802
+ return { parent: el.parentNode, before: el };
803
+ }
804
+ return { parent: state.stack[i - 1] || state.root, before: null };
805
+ }
806
+ }
807
+ return null;
808
+ };
809
+
810
+ const insertWithFosterParenting = (state: ParserState, node: any): void => {
811
+ const currentParent = getCurrentParent(state);
812
+ const inTableContext = isInTableContext(state);
813
+
814
+ if (
815
+ inTableContext &&
816
+ currentParent.tagName &&
817
+ TABLE_CONTEXT_ELEMENTS.has(currentParent.tagName.toLowerCase())
818
+ ) {
819
+ const target = findFosterParentTarget(state);
820
+ if (target) {
821
+ if (target.before) {
822
+ const idx = target.parent.childNodes.indexOf(target.before);
823
+ if (idx !== -1) {
824
+ node.parentNode = target.parent;
825
+ target.parent.childNodes.splice(idx, 0, node);
826
+ if (node.nodeType === 3) {
827
+ mergeAdjacentTextNodes(target.parent, idx);
828
+ }
829
+ return;
830
+ }
831
+ }
832
+ appendChild(target.parent, node);
833
+ if (node.nodeType === 3) {
834
+ const insertedIdx = target.parent.childNodes.indexOf(node);
835
+ if (insertedIdx !== -1) {
836
+ mergeAdjacentTextNodes(target.parent, insertedIdx);
837
+ }
838
+ }
839
+ return;
840
+ }
841
+ }
842
+
843
+ appendChild(currentParent, node);
844
+ };
845
+
846
+ const insertWithFosterParentingAndReconstruct = (
847
+ state: ParserState,
848
+ node: any,
849
+ ): void => {
850
+ const target = findFosterParentTarget(state);
851
+ if (!target) {
852
+ appendChild(getCurrentParent(state), node);
853
+ return;
854
+ }
855
+
856
+ const activeElements = getActiveFormattingElementsBeforeMarker(state);
857
+
858
+ if (activeElements.length === 0) {
859
+ if (target.before) {
860
+ const idx = target.parent.childNodes.indexOf(target.before);
861
+ if (idx !== -1) {
862
+ node.parentNode = target.parent;
863
+ target.parent.childNodes.splice(idx, 0, node);
864
+ if (node.nodeType === 3) {
865
+ mergeAdjacentTextNodes(target.parent, idx);
866
+ }
867
+ return;
868
+ }
869
+ }
870
+ appendChild(target.parent, node);
871
+ if (node.nodeType === 3) {
872
+ const insertedIdx = target.parent.childNodes.indexOf(node);
873
+ if (insertedIdx !== -1) {
874
+ mergeAdjacentTextNodes(target.parent, insertedIdx);
875
+ }
876
+ }
877
+ return;
878
+ }
879
+
880
+ const hasMarker = state.activeFormattingElements.includes(null);
881
+ const lastFormatEl = activeElements[activeElements.length - 1];
882
+
883
+ if (
884
+ !hasMarker &&
885
+ lastFormatEl.parentNode === target.parent &&
886
+ target.parent.childNodes.indexOf(lastFormatEl) <
887
+ target.parent.childNodes.indexOf(target.before)
888
+ ) {
889
+ appendChild(lastFormatEl, node);
890
+ return;
891
+ }
892
+
893
+ let currentNode = node;
894
+ for (let i = activeElements.length - 1; i >= 0; i--) {
895
+ const formatEl = activeElements[i];
896
+ const clone = cloneFormattingElement(formatEl);
897
+ appendChild(clone, currentNode);
898
+ currentNode = clone;
899
+ }
900
+
901
+ if (target.before) {
902
+ const idx = target.parent.childNodes.indexOf(target.before);
903
+ if (idx !== -1) {
904
+ currentNode.parentNode = target.parent;
905
+ target.parent.childNodes.splice(idx, 0, currentNode);
906
+ return;
907
+ }
908
+ }
909
+ appendChild(target.parent, currentNode);
910
+ };
911
+
912
+ const getActiveFormattingElementsBeforeMarker = (state: ParserState): any[] => {
913
+ const result: any[] = [];
914
+ for (let i = 0; i < state.activeFormattingElements.length; i++) {
915
+ const el = state.activeFormattingElements[i];
916
+ if (el === null) {
917
+ continue;
918
+ }
919
+ if (!isInStack(state.stack, el)) {
920
+ result.push(el);
921
+ }
922
+ }
923
+ return result;
924
+ };