@tkeron/html-parser 1.3.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/parser/constants.ts +12 -0
- package/src/parser/foster-parenting-helpers.ts +48 -0
- package/src/parser/implicit-table-structure.ts +65 -0
- package/src/parser/parse.ts +416 -74
- package/tests/adoption-multiple-iterations.test.ts +118 -0
- package/tests/foster-parenting.test.ts +127 -0
- package/tests/helpers/tree-adapter.ts +3 -3
- package/tests/implicit-close-formatting.test.ts +113 -0
- package/tests/tree-construction-adoption01.test.ts +3 -1
- package/tests/tree-construction-adoption02.test.ts +1 -1
package/package.json
CHANGED
package/src/parser/constants.ts
CHANGED
|
@@ -135,3 +135,15 @@ export const VALID_TR_CHILDREN = new Set([
|
|
|
135
135
|
"template",
|
|
136
136
|
"style",
|
|
137
137
|
]);
|
|
138
|
+
|
|
139
|
+
export const BUTTON_SCOPE_TERMINATORS = new Set([
|
|
140
|
+
"applet",
|
|
141
|
+
"caption",
|
|
142
|
+
"html",
|
|
143
|
+
"table",
|
|
144
|
+
"td",
|
|
145
|
+
"th",
|
|
146
|
+
"marquee",
|
|
147
|
+
"object",
|
|
148
|
+
"template",
|
|
149
|
+
]);
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
export const mergeAdjacentTextNodes = (
|
|
2
|
+
parent: any,
|
|
3
|
+
insertIndex: number,
|
|
4
|
+
): void => {
|
|
5
|
+
if (!parent.childNodes || parent.childNodes.length < 2) {
|
|
6
|
+
return;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
const node = parent.childNodes[insertIndex];
|
|
10
|
+
if (!node || node.nodeType !== 3) {
|
|
11
|
+
return;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
if (insertIndex > 0) {
|
|
15
|
+
const prevNode = parent.childNodes[insertIndex - 1];
|
|
16
|
+
if (prevNode && prevNode.nodeType === 3) {
|
|
17
|
+
prevNode.textContent =
|
|
18
|
+
(prevNode.textContent || "") + (node.textContent || "");
|
|
19
|
+
prevNode.nodeValue = prevNode.textContent;
|
|
20
|
+
parent.childNodes.splice(insertIndex, 1);
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
if (insertIndex < parent.childNodes.length - 1) {
|
|
26
|
+
const nextNode = parent.childNodes[insertIndex + 1];
|
|
27
|
+
if (nextNode && nextNode.nodeType === 3) {
|
|
28
|
+
node.textContent =
|
|
29
|
+
(node.textContent || "") + (nextNode.textContent || "");
|
|
30
|
+
node.nodeValue = node.textContent;
|
|
31
|
+
parent.childNodes.splice(insertIndex + 1, 1);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
export const insertNodeBeforeTable = (
|
|
37
|
+
parent: any,
|
|
38
|
+
tableElement: any,
|
|
39
|
+
node: any,
|
|
40
|
+
): number => {
|
|
41
|
+
const idx = parent.childNodes.indexOf(tableElement);
|
|
42
|
+
if (idx !== -1) {
|
|
43
|
+
node.parentNode = parent;
|
|
44
|
+
parent.childNodes.splice(idx, 0, node);
|
|
45
|
+
return idx;
|
|
46
|
+
}
|
|
47
|
+
return -1;
|
|
48
|
+
};
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { createElement, appendChild } from "../dom-simulator/index.js";
|
|
2
|
+
|
|
3
|
+
export const CELL_ELEMENTS = new Set(["td", "th"]);
|
|
4
|
+
|
|
5
|
+
export const TABLE_SECTION_ELEMENTS = new Set(["tbody", "thead", "tfoot"]);
|
|
6
|
+
|
|
7
|
+
export const shouldCreateImplicitTableStructure = (
|
|
8
|
+
parentTagName: string,
|
|
9
|
+
childTagName: string,
|
|
10
|
+
): boolean => {
|
|
11
|
+
const parent = parentTagName.toLowerCase();
|
|
12
|
+
const child = childTagName.toLowerCase();
|
|
13
|
+
|
|
14
|
+
if (CELL_ELEMENTS.has(child)) {
|
|
15
|
+
return parent === "table" || TABLE_SECTION_ELEMENTS.has(parent);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
if (child === "tr") {
|
|
19
|
+
return parent === "table";
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
return false;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
export const createImplicitTableStructure = (
|
|
26
|
+
stack: any[],
|
|
27
|
+
parentTagName: string,
|
|
28
|
+
childTagName: string,
|
|
29
|
+
): any => {
|
|
30
|
+
const parent = parentTagName.toLowerCase();
|
|
31
|
+
const child = childTagName.toLowerCase();
|
|
32
|
+
const currentParent = stack[stack.length - 1];
|
|
33
|
+
|
|
34
|
+
if (CELL_ELEMENTS.has(child)) {
|
|
35
|
+
if (parent === "table") {
|
|
36
|
+
const tbody = createElement("tbody", {});
|
|
37
|
+
appendChild(currentParent, tbody);
|
|
38
|
+
stack.push(tbody);
|
|
39
|
+
|
|
40
|
+
const tr = createElement("tr", {});
|
|
41
|
+
appendChild(tbody, tr);
|
|
42
|
+
stack.push(tr);
|
|
43
|
+
|
|
44
|
+
return tr;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (TABLE_SECTION_ELEMENTS.has(parent)) {
|
|
48
|
+
const tr = createElement("tr", {});
|
|
49
|
+
appendChild(currentParent, tr);
|
|
50
|
+
stack.push(tr);
|
|
51
|
+
|
|
52
|
+
return tr;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if (child === "tr" && parent === "table") {
|
|
57
|
+
const tbody = createElement("tbody", {});
|
|
58
|
+
appendChild(currentParent, tbody);
|
|
59
|
+
stack.push(tbody);
|
|
60
|
+
|
|
61
|
+
return tbody;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return currentParent;
|
|
65
|
+
};
|
package/src/parser/parse.ts
CHANGED
|
@@ -21,14 +21,20 @@ import {
|
|
|
21
21
|
VALID_TABLE_CHILDREN,
|
|
22
22
|
VALID_TABLE_SECTION_CHILDREN,
|
|
23
23
|
VALID_TR_CHILDREN,
|
|
24
|
+
BUTTON_SCOPE_TERMINATORS,
|
|
24
25
|
} from "./constants";
|
|
25
26
|
import {
|
|
26
|
-
findFormattingElementInStack,
|
|
27
27
|
findFurthestBlock,
|
|
28
28
|
getCommonAncestor,
|
|
29
29
|
cloneFormattingElement,
|
|
30
30
|
reparentChildren,
|
|
31
31
|
} from "./adoption-agency-helpers.js";
|
|
32
|
+
import {
|
|
33
|
+
shouldCreateImplicitTableStructure,
|
|
34
|
+
createImplicitTableStructure,
|
|
35
|
+
CELL_ELEMENTS,
|
|
36
|
+
} from "./implicit-table-structure.js";
|
|
37
|
+
import { mergeAdjacentTextNodes } from "./foster-parenting-helpers.js";
|
|
32
38
|
|
|
33
39
|
export const parse = (tokens: Token[]): any => {
|
|
34
40
|
const state = createParserState(tokens);
|
|
@@ -327,17 +333,49 @@ const parseTokenInInBodyMode = (state: ParserState, token: Token): void => {
|
|
|
327
333
|
if (token.type === TokenType.TAG_OPEN) {
|
|
328
334
|
const tagName = token.value.toLowerCase();
|
|
329
335
|
|
|
330
|
-
|
|
336
|
+
if (tagName === "a") {
|
|
337
|
+
const existingA = state.activeFormattingElements.find(
|
|
338
|
+
(el) => el && el.tagName && el.tagName.toLowerCase() === "a",
|
|
339
|
+
);
|
|
340
|
+
if (existingA) {
|
|
341
|
+
runAdoptionAgencyAlgorithm(state, "a");
|
|
342
|
+
}
|
|
343
|
+
}
|
|
331
344
|
|
|
332
|
-
|
|
345
|
+
const closedParagraph = handleAutoClosing(state, tagName);
|
|
333
346
|
|
|
334
|
-
const
|
|
347
|
+
const inTableContext = isInTableContext(state);
|
|
348
|
+
const isTableStructureElement =
|
|
349
|
+
CELL_ELEMENTS.has(tagName) ||
|
|
350
|
+
tagName === "tr" ||
|
|
351
|
+
tagName === "tbody" ||
|
|
352
|
+
tagName === "thead" ||
|
|
353
|
+
tagName === "tfoot";
|
|
354
|
+
const currentStackParent = getCurrentParent(state);
|
|
355
|
+
const currentStackParentTag =
|
|
356
|
+
currentStackParent.tagName?.toLowerCase() || "";
|
|
357
|
+
const parentIsTableContext = TABLE_CONTEXT_ELEMENTS.has(
|
|
358
|
+
currentStackParentTag,
|
|
359
|
+
);
|
|
360
|
+
|
|
361
|
+
if (inTableContext && isTableStructureElement) {
|
|
362
|
+
const tableParent = findTableContextParent(state);
|
|
363
|
+
if (tableParent) {
|
|
364
|
+
popStackUntilTableContext(state);
|
|
365
|
+
}
|
|
366
|
+
} else if (!parentIsTableContext && !closedParagraph) {
|
|
367
|
+
reconstructActiveFormattingElements(state);
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
let currentParent = getCurrentParent(state);
|
|
335
371
|
|
|
336
372
|
let namespaceURI: string | undefined;
|
|
337
373
|
if (tagName === "svg") {
|
|
338
374
|
namespaceURI = SVG_NAMESPACE;
|
|
339
375
|
} else if (tagName === "math") {
|
|
340
376
|
namespaceURI = MATHML_NAMESPACE;
|
|
377
|
+
} else {
|
|
378
|
+
namespaceURI = getCurrentNamespace(state);
|
|
341
379
|
}
|
|
342
380
|
|
|
343
381
|
const element = createElement(
|
|
@@ -346,8 +384,8 @@ const parseTokenInInBodyMode = (state: ParserState, token: Token): void => {
|
|
|
346
384
|
namespaceURI,
|
|
347
385
|
);
|
|
348
386
|
|
|
349
|
-
|
|
350
|
-
|
|
387
|
+
let parentTagName = currentParent.tagName || "";
|
|
388
|
+
|
|
351
389
|
const isValidForParent = isValidChildForTableParent(parentTagName, tagName);
|
|
352
390
|
const isHiddenInput =
|
|
353
391
|
tagName === "input" &&
|
|
@@ -355,43 +393,59 @@ const parseTokenInInBodyMode = (state: ParserState, token: Token): void => {
|
|
|
355
393
|
token.attributes.type &&
|
|
356
394
|
token.attributes.type.toLowerCase() === "hidden";
|
|
357
395
|
const isFormInTable = tagName === "form" && inTableContext;
|
|
396
|
+
|
|
397
|
+
const needsImplicitStructure =
|
|
398
|
+
inTableContext &&
|
|
399
|
+
shouldCreateImplicitTableStructure(parentTagName, tagName);
|
|
400
|
+
|
|
358
401
|
const needsFosterParenting =
|
|
359
402
|
inTableContext &&
|
|
360
403
|
TABLE_CONTEXT_ELEMENTS.has(parentTagName.toLowerCase()) &&
|
|
361
404
|
!isValidForParent &&
|
|
362
405
|
!isHiddenInput &&
|
|
363
|
-
!isFormInTable
|
|
406
|
+
!isFormInTable &&
|
|
407
|
+
!needsImplicitStructure;
|
|
364
408
|
|
|
365
|
-
if (
|
|
409
|
+
if (needsImplicitStructure) {
|
|
410
|
+
createImplicitTableStructure(state.stack, parentTagName, tagName);
|
|
411
|
+
appendChild(getCurrentParent(state), element);
|
|
412
|
+
} else if (needsFosterParenting) {
|
|
366
413
|
insertWithFosterParenting(state, element);
|
|
367
414
|
} else {
|
|
368
415
|
appendChild(currentParent, element);
|
|
369
416
|
}
|
|
370
417
|
|
|
418
|
+
const wasFosterParented = needsFosterParenting;
|
|
419
|
+
const isFormattingElement = FORMATTING_ELEMENTS.has(tagName);
|
|
420
|
+
|
|
371
421
|
if (!token.isSelfClosing && !VOID_ELEMENTS.has(tagName)) {
|
|
372
|
-
if (!isFormInTable) {
|
|
422
|
+
if (!isFormInTable && !(wasFosterParented && isFormattingElement)) {
|
|
373
423
|
state.stack.push(element);
|
|
374
424
|
}
|
|
375
425
|
|
|
376
|
-
if (
|
|
377
|
-
state
|
|
426
|
+
if (isFormattingElement) {
|
|
427
|
+
pushToActiveFormattingElements(state, element);
|
|
378
428
|
}
|
|
379
429
|
}
|
|
380
430
|
} else if (token.type === TokenType.TAG_CLOSE) {
|
|
381
431
|
const tagName = token.value.toLowerCase();
|
|
382
432
|
|
|
383
|
-
if (FORMATTING_ELEMENTS.has(tagName)) {
|
|
433
|
+
if (FORMATTING_ELEMENTS.has(tagName) && !isInForeignContent(state)) {
|
|
384
434
|
runAdoptionAgencyAlgorithm(state, tagName);
|
|
385
435
|
return;
|
|
386
436
|
}
|
|
387
437
|
|
|
438
|
+
if (tagName === "p") {
|
|
439
|
+
closeParagraphElement(state);
|
|
440
|
+
return;
|
|
441
|
+
}
|
|
442
|
+
|
|
388
443
|
const impliedEndTags = [
|
|
389
444
|
"dd",
|
|
390
445
|
"dt",
|
|
391
446
|
"li",
|
|
392
447
|
"option",
|
|
393
448
|
"optgroup",
|
|
394
|
-
"p",
|
|
395
449
|
"rb",
|
|
396
450
|
"rp",
|
|
397
451
|
"rt",
|
|
@@ -440,76 +494,132 @@ const runAdoptionAgencyAlgorithm = (
|
|
|
440
494
|
state: ParserState,
|
|
441
495
|
tagName: string,
|
|
442
496
|
): void => {
|
|
443
|
-
const
|
|
497
|
+
const maxIterations = 8;
|
|
444
498
|
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
499
|
+
for (let iteration = 0; iteration < maxIterations; iteration++) {
|
|
500
|
+
const formattingElementIndex = state.activeFormattingElements.findIndex(
|
|
501
|
+
(el) =>
|
|
502
|
+
el && el.tagName && el.tagName.toLowerCase() === tagName.toLowerCase(),
|
|
503
|
+
);
|
|
448
504
|
|
|
449
|
-
|
|
505
|
+
if (formattingElementIndex === -1) {
|
|
506
|
+
return;
|
|
507
|
+
}
|
|
450
508
|
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
state.stack.
|
|
454
|
-
removeFromActiveFormattingElements(state, formattingElement);
|
|
455
|
-
return;
|
|
456
|
-
}
|
|
509
|
+
const formattingElement =
|
|
510
|
+
state.activeFormattingElements[formattingElementIndex];
|
|
511
|
+
const stackIndex = state.stack.indexOf(formattingElement);
|
|
457
512
|
|
|
458
|
-
|
|
513
|
+
if (stackIndex === -1) {
|
|
514
|
+
state.activeFormattingElements.splice(formattingElementIndex, 1);
|
|
515
|
+
return;
|
|
516
|
+
}
|
|
459
517
|
|
|
460
|
-
|
|
461
|
-
|
|
518
|
+
const currentElement = getCurrentElement(state);
|
|
519
|
+
if (currentElement === formattingElement) {
|
|
462
520
|
state.stack.pop();
|
|
521
|
+
removeFromActiveFormattingElements(state, formattingElement);
|
|
522
|
+
return;
|
|
463
523
|
}
|
|
464
|
-
removeFromActiveFormattingElements(state, formattingElement);
|
|
465
|
-
return;
|
|
466
|
-
}
|
|
467
524
|
|
|
468
|
-
|
|
469
|
-
const commonAncestor = getCommonAncestor(state.stack, formattingElementIndex);
|
|
525
|
+
const fbResult = findFurthestBlock(state.stack, stackIndex);
|
|
470
526
|
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
527
|
+
if (!fbResult) {
|
|
528
|
+
while (state.stack.length > stackIndex) {
|
|
529
|
+
state.stack.pop();
|
|
530
|
+
}
|
|
531
|
+
removeFromActiveFormattingElements(state, formattingElement);
|
|
532
|
+
return;
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
const { element: furthestBlock, index: furthestBlockIndex } = fbResult;
|
|
536
|
+
const commonAncestor = getCommonAncestor(state.stack, stackIndex);
|
|
537
|
+
|
|
538
|
+
if (!commonAncestor) {
|
|
539
|
+
return;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
let lastNode = furthestBlock;
|
|
543
|
+
const clonedNodes: any[] = [];
|
|
544
|
+
const nodesToRemoveFromStack: any[] = [];
|
|
545
|
+
let innerLoopCounter = 0;
|
|
546
|
+
let nodeIndex = furthestBlockIndex;
|
|
547
|
+
|
|
548
|
+
while (true) {
|
|
549
|
+
innerLoopCounter++;
|
|
550
|
+
nodeIndex--;
|
|
551
|
+
const node = state.stack[nodeIndex];
|
|
552
|
+
|
|
553
|
+
if (node === formattingElement) {
|
|
554
|
+
break;
|
|
555
|
+
}
|
|
474
556
|
|
|
475
|
-
|
|
476
|
-
|
|
557
|
+
if (
|
|
558
|
+
innerLoopCounter > 3 &&
|
|
559
|
+
state.activeFormattingElements.includes(node)
|
|
560
|
+
) {
|
|
561
|
+
removeFromActiveFormattingElements(state, node);
|
|
562
|
+
}
|
|
477
563
|
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
564
|
+
if (!state.activeFormattingElements.includes(node)) {
|
|
565
|
+
nodesToRemoveFromStack.push(node);
|
|
566
|
+
continue;
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
const nodeClone = cloneFormattingElement(node);
|
|
570
|
+
clonedNodes.unshift(nodeClone);
|
|
482
571
|
|
|
483
|
-
|
|
572
|
+
replaceInActiveFormattingElements(state, node, nodeClone);
|
|
484
573
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
574
|
+
const nodeChildIdx = node.childNodes.indexOf(lastNode);
|
|
575
|
+
if (nodeChildIdx !== -1) {
|
|
576
|
+
node.childNodes.splice(nodeChildIdx, 1);
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
appendChild(nodeClone, lastNode);
|
|
580
|
+
lastNode = nodeClone;
|
|
488
581
|
}
|
|
489
582
|
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
583
|
+
for (const node of nodesToRemoveFromStack) {
|
|
584
|
+
const idx = state.stack.indexOf(node);
|
|
585
|
+
if (idx !== -1) {
|
|
586
|
+
state.stack.splice(idx, 1);
|
|
587
|
+
}
|
|
588
|
+
}
|
|
493
589
|
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
590
|
+
const fbIdx = formattingElement.childNodes.indexOf(furthestBlock);
|
|
591
|
+
if (fbIdx !== -1) {
|
|
592
|
+
formattingElement.childNodes.splice(fbIdx, 1);
|
|
593
|
+
furthestBlock.parentNode = null;
|
|
594
|
+
}
|
|
499
595
|
|
|
500
|
-
|
|
596
|
+
appendChild(commonAncestor, lastNode);
|
|
501
597
|
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
598
|
+
const newFormattingElement = cloneFormattingElement(formattingElement);
|
|
599
|
+
reparentChildren(furthestBlock, newFormattingElement);
|
|
600
|
+
appendChild(furthestBlock, newFormattingElement);
|
|
505
601
|
|
|
506
|
-
|
|
602
|
+
removeFromActiveFormattingElements(state, formattingElement);
|
|
603
|
+
state.activeFormattingElements.splice(
|
|
604
|
+
formattingElementIndex,
|
|
605
|
+
0,
|
|
606
|
+
newFormattingElement,
|
|
607
|
+
);
|
|
608
|
+
|
|
609
|
+
const elementsAfterFurthestBlock = state.stack.slice(
|
|
610
|
+
furthestBlockIndex + 1,
|
|
611
|
+
);
|
|
507
612
|
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
613
|
+
state.stack.length = stackIndex;
|
|
614
|
+
for (const clonedNode of clonedNodes) {
|
|
615
|
+
state.stack.push(clonedNode);
|
|
616
|
+
}
|
|
617
|
+
state.stack.push(furthestBlock);
|
|
618
|
+
state.stack.push(newFormattingElement);
|
|
619
|
+
for (const element of elementsAfterFurthestBlock) {
|
|
620
|
+
state.stack.push(element);
|
|
621
|
+
}
|
|
511
622
|
}
|
|
512
|
-
state.stack.push(furthestBlock);
|
|
513
623
|
};
|
|
514
624
|
|
|
515
625
|
const removeFromActiveFormattingElements = (
|
|
@@ -533,6 +643,60 @@ const replaceInActiveFormattingElements = (
|
|
|
533
643
|
}
|
|
534
644
|
};
|
|
535
645
|
|
|
646
|
+
const pushToActiveFormattingElements = (
|
|
647
|
+
state: ParserState,
|
|
648
|
+
element: any,
|
|
649
|
+
): void => {
|
|
650
|
+
const list = state.activeFormattingElements;
|
|
651
|
+
const tagName = element.tagName?.toLowerCase();
|
|
652
|
+
|
|
653
|
+
let count = 0;
|
|
654
|
+
let oldestMatchIndex = -1;
|
|
655
|
+
|
|
656
|
+
for (let i = list.length - 1; i >= 0; i--) {
|
|
657
|
+
const entry = list[i];
|
|
658
|
+
if (entry === null) {
|
|
659
|
+
break;
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
if (
|
|
663
|
+
entry.tagName?.toLowerCase() === tagName &&
|
|
664
|
+
attributesMatch(entry, element)
|
|
665
|
+
) {
|
|
666
|
+
if (oldestMatchIndex === -1) {
|
|
667
|
+
oldestMatchIndex = i;
|
|
668
|
+
}
|
|
669
|
+
count++;
|
|
670
|
+
if (count >= 3) {
|
|
671
|
+
list.splice(oldestMatchIndex, 1);
|
|
672
|
+
break;
|
|
673
|
+
}
|
|
674
|
+
oldestMatchIndex = i;
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
list.push(element);
|
|
679
|
+
};
|
|
680
|
+
|
|
681
|
+
const attributesMatch = (el1: any, el2: any): boolean => {
|
|
682
|
+
const attrs1 = el1.attributes || {};
|
|
683
|
+
const attrs2 = el2.attributes || {};
|
|
684
|
+
const keys1 = Object.keys(attrs1);
|
|
685
|
+
const keys2 = Object.keys(attrs2);
|
|
686
|
+
|
|
687
|
+
if (keys1.length !== keys2.length) {
|
|
688
|
+
return false;
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
for (const key of keys1) {
|
|
692
|
+
if (attrs1[key] !== attrs2[key]) {
|
|
693
|
+
return false;
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
return true;
|
|
698
|
+
};
|
|
699
|
+
|
|
536
700
|
const parseText = (state: ParserState, token: Token): void => {
|
|
537
701
|
const content = token.value;
|
|
538
702
|
|
|
@@ -541,8 +705,6 @@ const parseText = (state: ParserState, token: Token): void => {
|
|
|
541
705
|
return;
|
|
542
706
|
}
|
|
543
707
|
|
|
544
|
-
reconstructActiveFormattingElements(state);
|
|
545
|
-
|
|
546
708
|
const textNode = createTextNode(content);
|
|
547
709
|
|
|
548
710
|
const inTableContext = isInTableContext(state);
|
|
@@ -552,9 +714,10 @@ const parseText = (state: ParserState, token: Token): void => {
|
|
|
552
714
|
currentParent.tagName &&
|
|
553
715
|
TABLE_CONTEXT_ELEMENTS.has(currentParent.tagName.toLowerCase())
|
|
554
716
|
) {
|
|
555
|
-
|
|
717
|
+
insertWithFosterParentingAndReconstruct(state, textNode);
|
|
556
718
|
} else {
|
|
557
|
-
|
|
719
|
+
reconstructActiveFormattingElements(state);
|
|
720
|
+
appendChild(getCurrentParent(state), textNode);
|
|
558
721
|
}
|
|
559
722
|
};
|
|
560
723
|
|
|
@@ -583,18 +746,57 @@ const parseProcessingInstruction = (state: ParserState, token: Token): void => {
|
|
|
583
746
|
appendChild(currentParent, piNode);
|
|
584
747
|
};
|
|
585
748
|
|
|
586
|
-
const
|
|
749
|
+
const closeParagraphElement = (state: ParserState): void => {
|
|
750
|
+
let pIndex = -1;
|
|
751
|
+
for (let i = state.stack.length - 1; i >= 0; i--) {
|
|
752
|
+
const element = state.stack[i];
|
|
753
|
+
const elementTag = element.tagName?.toLowerCase();
|
|
754
|
+
|
|
755
|
+
if (elementTag === "p") {
|
|
756
|
+
pIndex = i;
|
|
757
|
+
break;
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
if (elementTag && BUTTON_SCOPE_TERMINATORS.has(elementTag)) {
|
|
761
|
+
return;
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
if (pIndex === -1) {
|
|
766
|
+
return;
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
while (state.stack.length > pIndex) {
|
|
770
|
+
state.stack.pop();
|
|
771
|
+
}
|
|
772
|
+
};
|
|
773
|
+
|
|
774
|
+
const handleAutoClosing = (state: ParserState, tagName: string): boolean => {
|
|
587
775
|
const autoCloseList = AUTO_CLOSE_RULES[tagName];
|
|
588
|
-
if (!autoCloseList) return;
|
|
776
|
+
if (!autoCloseList) return false;
|
|
589
777
|
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
778
|
+
let targetIndex = -1;
|
|
779
|
+
for (let i = state.stack.length - 1; i >= 0; i--) {
|
|
780
|
+
const element = state.stack[i];
|
|
781
|
+
const elementTag = element.tagName?.toLowerCase();
|
|
782
|
+
|
|
783
|
+
if (elementTag && autoCloseList.includes(elementTag)) {
|
|
784
|
+
targetIndex = i;
|
|
785
|
+
break;
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
if (elementTag && BUTTON_SCOPE_TERMINATORS.has(elementTag)) {
|
|
789
|
+
return false;
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
if (targetIndex === -1) return false;
|
|
794
|
+
|
|
795
|
+
while (state.stack.length > targetIndex) {
|
|
596
796
|
state.stack.pop();
|
|
597
797
|
}
|
|
798
|
+
|
|
799
|
+
return true;
|
|
598
800
|
};
|
|
599
801
|
|
|
600
802
|
const getCurrentParent = (state: ParserState): any => {
|
|
@@ -710,6 +912,57 @@ const isInTableContext = (state: ParserState): boolean => {
|
|
|
710
912
|
return false;
|
|
711
913
|
};
|
|
712
914
|
|
|
915
|
+
const isInForeignContent = (state: ParserState): boolean => {
|
|
916
|
+
for (let i = state.stack.length - 1; i >= 0; i--) {
|
|
917
|
+
const el = state.stack[i];
|
|
918
|
+
if (
|
|
919
|
+
el.namespaceURI === SVG_NAMESPACE ||
|
|
920
|
+
el.namespaceURI === MATHML_NAMESPACE
|
|
921
|
+
) {
|
|
922
|
+
return true;
|
|
923
|
+
}
|
|
924
|
+
if (el.tagName && el.tagName.toLowerCase() === "html") {
|
|
925
|
+
return false;
|
|
926
|
+
}
|
|
927
|
+
}
|
|
928
|
+
return false;
|
|
929
|
+
};
|
|
930
|
+
|
|
931
|
+
const getCurrentNamespace = (state: ParserState): string | undefined => {
|
|
932
|
+
for (let i = state.stack.length - 1; i >= 0; i--) {
|
|
933
|
+
const el = state.stack[i];
|
|
934
|
+
if (el.namespaceURI) {
|
|
935
|
+
return el.namespaceURI;
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
return undefined;
|
|
939
|
+
};
|
|
940
|
+
|
|
941
|
+
const findTableContextParent = (state: ParserState): any | null => {
|
|
942
|
+
for (let i = state.stack.length - 1; i >= 0; i--) {
|
|
943
|
+
const el = state.stack[i];
|
|
944
|
+
if (el.tagName && TABLE_CONTEXT_ELEMENTS.has(el.tagName.toLowerCase())) {
|
|
945
|
+
return el;
|
|
946
|
+
}
|
|
947
|
+
}
|
|
948
|
+
return null;
|
|
949
|
+
};
|
|
950
|
+
|
|
951
|
+
const popStackUntilTableContext = (state: ParserState): void => {
|
|
952
|
+
while (state.stack.length > 1) {
|
|
953
|
+
const el = getCurrentElement(state);
|
|
954
|
+
if (
|
|
955
|
+
el &&
|
|
956
|
+
el.tagName &&
|
|
957
|
+
TABLE_CONTEXT_ELEMENTS.has(el.tagName.toLowerCase())
|
|
958
|
+
) {
|
|
959
|
+
break;
|
|
960
|
+
}
|
|
961
|
+
state.stack.pop();
|
|
962
|
+
}
|
|
963
|
+
state.activeFormattingElements.push(null);
|
|
964
|
+
};
|
|
965
|
+
|
|
713
966
|
const isValidChildForTableParent = (
|
|
714
967
|
parentTagName: string,
|
|
715
968
|
childTagName: string,
|
|
@@ -760,13 +1013,102 @@ const insertWithFosterParenting = (state: ParserState, node: any): void => {
|
|
|
760
1013
|
if (idx !== -1) {
|
|
761
1014
|
node.parentNode = target.parent;
|
|
762
1015
|
target.parent.childNodes.splice(idx, 0, node);
|
|
1016
|
+
if (node.nodeType === 3) {
|
|
1017
|
+
mergeAdjacentTextNodes(target.parent, idx);
|
|
1018
|
+
}
|
|
763
1019
|
return;
|
|
764
1020
|
}
|
|
765
1021
|
}
|
|
766
1022
|
appendChild(target.parent, node);
|
|
1023
|
+
if (node.nodeType === 3) {
|
|
1024
|
+
const insertedIdx = target.parent.childNodes.indexOf(node);
|
|
1025
|
+
if (insertedIdx !== -1) {
|
|
1026
|
+
mergeAdjacentTextNodes(target.parent, insertedIdx);
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
767
1029
|
return;
|
|
768
1030
|
}
|
|
769
1031
|
}
|
|
770
1032
|
|
|
771
1033
|
appendChild(currentParent, node);
|
|
772
1034
|
};
|
|
1035
|
+
|
|
1036
|
+
const insertWithFosterParentingAndReconstruct = (
|
|
1037
|
+
state: ParserState,
|
|
1038
|
+
node: any,
|
|
1039
|
+
): void => {
|
|
1040
|
+
const target = findFosterParentTarget(state);
|
|
1041
|
+
if (!target) {
|
|
1042
|
+
appendChild(getCurrentParent(state), node);
|
|
1043
|
+
return;
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
const activeElements = getActiveFormattingElementsBeforeMarker(state);
|
|
1047
|
+
|
|
1048
|
+
if (activeElements.length === 0) {
|
|
1049
|
+
if (target.before) {
|
|
1050
|
+
const idx = target.parent.childNodes.indexOf(target.before);
|
|
1051
|
+
if (idx !== -1) {
|
|
1052
|
+
node.parentNode = target.parent;
|
|
1053
|
+
target.parent.childNodes.splice(idx, 0, node);
|
|
1054
|
+
if (node.nodeType === 3) {
|
|
1055
|
+
mergeAdjacentTextNodes(target.parent, idx);
|
|
1056
|
+
}
|
|
1057
|
+
return;
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
appendChild(target.parent, node);
|
|
1061
|
+
if (node.nodeType === 3) {
|
|
1062
|
+
const insertedIdx = target.parent.childNodes.indexOf(node);
|
|
1063
|
+
if (insertedIdx !== -1) {
|
|
1064
|
+
mergeAdjacentTextNodes(target.parent, insertedIdx);
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
return;
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
const hasMarker = state.activeFormattingElements.includes(null);
|
|
1071
|
+
const lastFormatEl = activeElements[activeElements.length - 1];
|
|
1072
|
+
|
|
1073
|
+
if (
|
|
1074
|
+
!hasMarker &&
|
|
1075
|
+
lastFormatEl.parentNode === target.parent &&
|
|
1076
|
+
target.parent.childNodes.indexOf(lastFormatEl) <
|
|
1077
|
+
target.parent.childNodes.indexOf(target.before)
|
|
1078
|
+
) {
|
|
1079
|
+
appendChild(lastFormatEl, node);
|
|
1080
|
+
return;
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
let currentNode = node;
|
|
1084
|
+
for (let i = activeElements.length - 1; i >= 0; i--) {
|
|
1085
|
+
const formatEl = activeElements[i];
|
|
1086
|
+
const clone = cloneFormattingElement(formatEl);
|
|
1087
|
+
appendChild(clone, currentNode);
|
|
1088
|
+
currentNode = clone;
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
if (target.before) {
|
|
1092
|
+
const idx = target.parent.childNodes.indexOf(target.before);
|
|
1093
|
+
if (idx !== -1) {
|
|
1094
|
+
currentNode.parentNode = target.parent;
|
|
1095
|
+
target.parent.childNodes.splice(idx, 0, currentNode);
|
|
1096
|
+
return;
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
appendChild(target.parent, currentNode);
|
|
1100
|
+
};
|
|
1101
|
+
|
|
1102
|
+
const getActiveFormattingElementsBeforeMarker = (state: ParserState): any[] => {
|
|
1103
|
+
const result: any[] = [];
|
|
1104
|
+
for (let i = 0; i < state.activeFormattingElements.length; i++) {
|
|
1105
|
+
const el = state.activeFormattingElements[i];
|
|
1106
|
+
if (el === null) {
|
|
1107
|
+
continue;
|
|
1108
|
+
}
|
|
1109
|
+
if (!isInStack(state.stack, el)) {
|
|
1110
|
+
result.push(el);
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
return result;
|
|
1114
|
+
};
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { it, expect } from "bun:test";
|
|
2
|
+
import { parseHTML } from "../index.js";
|
|
3
|
+
import { serializeToHtml5lib } from "./helpers/tree-adapter.js";
|
|
4
|
+
|
|
5
|
+
it("should run AAA 2 times - test case with nested divs", () => {
|
|
6
|
+
const html = "<a>1<div>2<div>3</a>4</div>5</div>";
|
|
7
|
+
const doc = parseHTML(html);
|
|
8
|
+
const serialized = serializeToHtml5lib(doc, { skipImplicitDoctype: true });
|
|
9
|
+
|
|
10
|
+
const expected = `| <html>
|
|
11
|
+
| <head>
|
|
12
|
+
| <body>
|
|
13
|
+
| <a>
|
|
14
|
+
| "1"
|
|
15
|
+
| <div>
|
|
16
|
+
| <a>
|
|
17
|
+
| "2"
|
|
18
|
+
| <div>
|
|
19
|
+
| <a>
|
|
20
|
+
| "3"
|
|
21
|
+
| "4"
|
|
22
|
+
| "5"
|
|
23
|
+
`;
|
|
24
|
+
|
|
25
|
+
expect(serialized).toBe(expected);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it("should run AAA 8 times - deeply nested divs", () => {
|
|
29
|
+
const html =
|
|
30
|
+
"<div><a><b><div><div><div><div><div><div><div><div><div><div></a>";
|
|
31
|
+
const doc = parseHTML(html);
|
|
32
|
+
const serialized = serializeToHtml5lib(doc, { skipImplicitDoctype: true });
|
|
33
|
+
|
|
34
|
+
const expected = `| <html>
|
|
35
|
+
| <head>
|
|
36
|
+
| <body>
|
|
37
|
+
| <div>
|
|
38
|
+
| <a>
|
|
39
|
+
| <b>
|
|
40
|
+
| <b>
|
|
41
|
+
| <div>
|
|
42
|
+
| <a>
|
|
43
|
+
| <div>
|
|
44
|
+
| <a>
|
|
45
|
+
| <div>
|
|
46
|
+
| <a>
|
|
47
|
+
| <div>
|
|
48
|
+
| <a>
|
|
49
|
+
| <div>
|
|
50
|
+
| <a>
|
|
51
|
+
| <div>
|
|
52
|
+
| <a>
|
|
53
|
+
| <div>
|
|
54
|
+
| <a>
|
|
55
|
+
| <div>
|
|
56
|
+
| <a>
|
|
57
|
+
| <div>
|
|
58
|
+
| <div>
|
|
59
|
+
`;
|
|
60
|
+
|
|
61
|
+
expect(serialized).toBe(expected);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it("should run AAA 2 times - with style and address elements", () => {
|
|
65
|
+
const html = "<a><div><style></style><address><a>";
|
|
66
|
+
const doc = parseHTML(html);
|
|
67
|
+
const serialized = serializeToHtml5lib(doc, { skipImplicitDoctype: true });
|
|
68
|
+
|
|
69
|
+
const expected = `| <html>
|
|
70
|
+
| <head>
|
|
71
|
+
| <body>
|
|
72
|
+
| <a>
|
|
73
|
+
| <div>
|
|
74
|
+
| <a>
|
|
75
|
+
| <style>
|
|
76
|
+
| <address>
|
|
77
|
+
| <a>
|
|
78
|
+
| <a>
|
|
79
|
+
`;
|
|
80
|
+
|
|
81
|
+
expect(serialized).toBe(expected);
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
it("should run AAA with formatting element cloning", () => {
|
|
85
|
+
const html = "<a>x<div>y</a>z</div>";
|
|
86
|
+
const doc = parseHTML(html);
|
|
87
|
+
const serialized = serializeToHtml5lib(doc, { skipImplicitDoctype: true });
|
|
88
|
+
|
|
89
|
+
const expected = `| <html>
|
|
90
|
+
| <head>
|
|
91
|
+
| <body>
|
|
92
|
+
| <a>
|
|
93
|
+
| "x"
|
|
94
|
+
| <div>
|
|
95
|
+
| <a>
|
|
96
|
+
| "y"
|
|
97
|
+
| "z"
|
|
98
|
+
`;
|
|
99
|
+
|
|
100
|
+
expect(serialized).toBe(expected);
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
it("should stop AAA when no more formatting elements to adopt", () => {
|
|
104
|
+
const html = "<b>text</b><div>content</div>";
|
|
105
|
+
const doc = parseHTML(html);
|
|
106
|
+
const serialized = serializeToHtml5lib(doc, { skipImplicitDoctype: true });
|
|
107
|
+
|
|
108
|
+
const expected = `| <html>
|
|
109
|
+
| <head>
|
|
110
|
+
| <body>
|
|
111
|
+
| <b>
|
|
112
|
+
| "text"
|
|
113
|
+
| <div>
|
|
114
|
+
| "content"
|
|
115
|
+
`;
|
|
116
|
+
|
|
117
|
+
expect(serialized).toBe(expected);
|
|
118
|
+
});
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import { expect, it, describe } from "bun:test";
|
|
2
|
+
import { parseHTML } from "../index";
|
|
3
|
+
import { serializeToHtml5lib } from "./helpers/tree-adapter";
|
|
4
|
+
|
|
5
|
+
describe("Foster Parenting", () => {
|
|
6
|
+
describe("Text foster parenting", () => {
|
|
7
|
+
it("should foster parent text before table and merge adjacent text nodes", () => {
|
|
8
|
+
const doc = parseHTML("<table>A<td>B</td>C</table>");
|
|
9
|
+
const serialized = serializeToHtml5lib(doc, {
|
|
10
|
+
skipImplicitDoctype: true,
|
|
11
|
+
});
|
|
12
|
+
expect(serialized).toBe(`| <html>
|
|
13
|
+
| <head>
|
|
14
|
+
| <body>
|
|
15
|
+
| "AC"
|
|
16
|
+
| <table>
|
|
17
|
+
| <tbody>
|
|
18
|
+
| <tr>
|
|
19
|
+
| <td>
|
|
20
|
+
| "B"
|
|
21
|
+
`);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it("should foster parent text with whitespace correctly", () => {
|
|
25
|
+
const doc = parseHTML("<table> X </table>");
|
|
26
|
+
const serialized = serializeToHtml5lib(doc, {
|
|
27
|
+
skipImplicitDoctype: true,
|
|
28
|
+
});
|
|
29
|
+
expect(serialized).toBe(`| <html>
|
|
30
|
+
| <head>
|
|
31
|
+
| <body>
|
|
32
|
+
| " X "
|
|
33
|
+
| <table>
|
|
34
|
+
`);
|
|
35
|
+
});
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
describe("Element foster parenting", () => {
|
|
39
|
+
it("should foster parent <a> before table with AAA reconstruction", () => {
|
|
40
|
+
const doc = parseHTML("<table><a>1<td>2</td>3</table>");
|
|
41
|
+
const serialized = serializeToHtml5lib(doc, {
|
|
42
|
+
skipImplicitDoctype: true,
|
|
43
|
+
});
|
|
44
|
+
expect(serialized).toBe(`| <html>
|
|
45
|
+
| <head>
|
|
46
|
+
| <body>
|
|
47
|
+
| <a>
|
|
48
|
+
| "1"
|
|
49
|
+
| <a>
|
|
50
|
+
| "3"
|
|
51
|
+
| <table>
|
|
52
|
+
| <tbody>
|
|
53
|
+
| <tr>
|
|
54
|
+
| <td>
|
|
55
|
+
| "2"
|
|
56
|
+
`);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it("should foster parent elements with AAA for formatting in <p>", () => {
|
|
60
|
+
const doc = parseHTML("<table><a>1<p>2</a>3</p>");
|
|
61
|
+
const serialized = serializeToHtml5lib(doc, {
|
|
62
|
+
skipImplicitDoctype: true,
|
|
63
|
+
});
|
|
64
|
+
expect(serialized).toBe(`| <html>
|
|
65
|
+
| <head>
|
|
66
|
+
| <body>
|
|
67
|
+
| <a>
|
|
68
|
+
| "1"
|
|
69
|
+
| <p>
|
|
70
|
+
| <a>
|
|
71
|
+
| "2"
|
|
72
|
+
| "3"
|
|
73
|
+
| <table>
|
|
74
|
+
`);
|
|
75
|
+
});
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
describe("Implicit table structure", () => {
|
|
79
|
+
it("should create implicit tbody and tr for td in table", () => {
|
|
80
|
+
const doc = parseHTML("<table><td>X</td></table>");
|
|
81
|
+
const serialized = serializeToHtml5lib(doc, {
|
|
82
|
+
skipImplicitDoctype: true,
|
|
83
|
+
});
|
|
84
|
+
expect(serialized).toBe(`| <html>
|
|
85
|
+
| <head>
|
|
86
|
+
| <body>
|
|
87
|
+
| <table>
|
|
88
|
+
| <tbody>
|
|
89
|
+
| <tr>
|
|
90
|
+
| <td>
|
|
91
|
+
| "X"
|
|
92
|
+
`);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it("should create implicit tr for td in tbody", () => {
|
|
96
|
+
const doc = parseHTML("<table><tbody><td>X</td></tbody></table>");
|
|
97
|
+
const serialized = serializeToHtml5lib(doc, {
|
|
98
|
+
skipImplicitDoctype: true,
|
|
99
|
+
});
|
|
100
|
+
expect(serialized).toBe(`| <html>
|
|
101
|
+
| <head>
|
|
102
|
+
| <body>
|
|
103
|
+
| <table>
|
|
104
|
+
| <tbody>
|
|
105
|
+
| <tr>
|
|
106
|
+
| <td>
|
|
107
|
+
| "X"
|
|
108
|
+
`);
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it("should not create implicit structure when tr is present", () => {
|
|
112
|
+
const doc = parseHTML("<table><tr><td>X</td></tr></table>");
|
|
113
|
+
const serialized = serializeToHtml5lib(doc, {
|
|
114
|
+
skipImplicitDoctype: true,
|
|
115
|
+
});
|
|
116
|
+
expect(serialized).toBe(`| <html>
|
|
117
|
+
| <head>
|
|
118
|
+
| <body>
|
|
119
|
+
| <table>
|
|
120
|
+
| <tbody>
|
|
121
|
+
| <tr>
|
|
122
|
+
| <td>
|
|
123
|
+
| "X"
|
|
124
|
+
`);
|
|
125
|
+
});
|
|
126
|
+
});
|
|
127
|
+
});
|
|
@@ -25,12 +25,12 @@ export function serializeToHtml5lib(
|
|
|
25
25
|
|
|
26
26
|
let nsPrefix = "";
|
|
27
27
|
if (ns === "http://www.w3.org/2000/svg") {
|
|
28
|
-
nsPrefix = "
|
|
28
|
+
nsPrefix = "svg ";
|
|
29
29
|
} else if (ns === "http://www.w3.org/1998/Math/MathML") {
|
|
30
|
-
nsPrefix = "
|
|
30
|
+
nsPrefix = "math ";
|
|
31
31
|
}
|
|
32
32
|
|
|
33
|
-
lines.push(`${indent}<${
|
|
33
|
+
lines.push(`${indent}<${nsPrefix}${tagName}>`);
|
|
34
34
|
|
|
35
35
|
// Atributos en orden alfabético
|
|
36
36
|
const attrs = Object.entries(node.attributes || {}).sort(([a], [b]) =>
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import { describe, it, expect } from "bun:test";
|
|
2
|
+
import { parseHTML } from "../index.js";
|
|
3
|
+
import { serializeToHtml5lib } from "./helpers/tree-adapter.js";
|
|
4
|
+
|
|
5
|
+
describe("implicit close with formatting element reconstruction", () => {
|
|
6
|
+
it("should close <p> and reconstruct <b> elements when new <p> opens", () => {
|
|
7
|
+
const html = "<p><b><b><b><b><p>x";
|
|
8
|
+
const doc = parseHTML(html);
|
|
9
|
+
const result = serializeToHtml5lib(doc, { skipImplicitDoctype: true });
|
|
10
|
+
|
|
11
|
+
expect(result).toBe(`| <html>
|
|
12
|
+
| <head>
|
|
13
|
+
| <body>
|
|
14
|
+
| <p>
|
|
15
|
+
| <b>
|
|
16
|
+
| <b>
|
|
17
|
+
| <b>
|
|
18
|
+
| <b>
|
|
19
|
+
| <p>
|
|
20
|
+
| <b>
|
|
21
|
+
| <b>
|
|
22
|
+
| <b>
|
|
23
|
+
| "x"
|
|
24
|
+
`);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
it("should close <p> through nested formatting and reconstruct (single <b>)", () => {
|
|
28
|
+
const html = "<p><b><p>x";
|
|
29
|
+
const doc = parseHTML(html);
|
|
30
|
+
const result = serializeToHtml5lib(doc, { skipImplicitDoctype: true });
|
|
31
|
+
|
|
32
|
+
expect(result).toBe(`| <html>
|
|
33
|
+
| <head>
|
|
34
|
+
| <body>
|
|
35
|
+
| <p>
|
|
36
|
+
| <b>
|
|
37
|
+
| <p>
|
|
38
|
+
| <b>
|
|
39
|
+
| "x"
|
|
40
|
+
`);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it("should handle text before and after implicit close", () => {
|
|
44
|
+
const html = "<p><b>1<p>2";
|
|
45
|
+
const doc = parseHTML(html);
|
|
46
|
+
const result = serializeToHtml5lib(doc, { skipImplicitDoctype: true });
|
|
47
|
+
|
|
48
|
+
expect(result).toBe(`| <html>
|
|
49
|
+
| <head>
|
|
50
|
+
| <body>
|
|
51
|
+
| <p>
|
|
52
|
+
| <b>
|
|
53
|
+
| "1"
|
|
54
|
+
| <p>
|
|
55
|
+
| <b>
|
|
56
|
+
| "2"
|
|
57
|
+
`);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it("should handle multiple different formatting elements", () => {
|
|
61
|
+
const html = "<p><b><i><p>x";
|
|
62
|
+
const doc = parseHTML(html);
|
|
63
|
+
const result = serializeToHtml5lib(doc, { skipImplicitDoctype: true });
|
|
64
|
+
|
|
65
|
+
expect(result).toBe(`| <html>
|
|
66
|
+
| <head>
|
|
67
|
+
| <body>
|
|
68
|
+
| <p>
|
|
69
|
+
| <b>
|
|
70
|
+
| <i>
|
|
71
|
+
| <p>
|
|
72
|
+
| <b>
|
|
73
|
+
| <i>
|
|
74
|
+
| "x"
|
|
75
|
+
`);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
it("should handle div closing <p> and reconstructing formatting", () => {
|
|
79
|
+
const html = "<p><b><div>x";
|
|
80
|
+
const doc = parseHTML(html);
|
|
81
|
+
const result = serializeToHtml5lib(doc, { skipImplicitDoctype: true });
|
|
82
|
+
|
|
83
|
+
expect(result).toBe(`| <html>
|
|
84
|
+
| <head>
|
|
85
|
+
| <body>
|
|
86
|
+
| <p>
|
|
87
|
+
| <b>
|
|
88
|
+
| <div>
|
|
89
|
+
| <b>
|
|
90
|
+
| "x"
|
|
91
|
+
`);
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
it("should handle multiple auto-closing with formatting", () => {
|
|
95
|
+
const html = "<p><b><p><i><p>x";
|
|
96
|
+
const doc = parseHTML(html);
|
|
97
|
+
const result = serializeToHtml5lib(doc, { skipImplicitDoctype: true });
|
|
98
|
+
|
|
99
|
+
expect(result).toBe(`| <html>
|
|
100
|
+
| <head>
|
|
101
|
+
| <body>
|
|
102
|
+
| <p>
|
|
103
|
+
| <b>
|
|
104
|
+
| <p>
|
|
105
|
+
| <b>
|
|
106
|
+
| <i>
|
|
107
|
+
| <p>
|
|
108
|
+
| <b>
|
|
109
|
+
| <i>
|
|
110
|
+
| "x"
|
|
111
|
+
`);
|
|
112
|
+
});
|
|
113
|
+
});
|
|
@@ -31,7 +31,9 @@ describe("Tree Construction Adoption01 Tests", () => {
|
|
|
31
31
|
}
|
|
32
32
|
}
|
|
33
33
|
|
|
34
|
-
const passingTests = [
|
|
34
|
+
const passingTests = [
|
|
35
|
+
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
|
|
36
|
+
];
|
|
35
37
|
const testFn = passingTests.includes(index + 1) ? it : it.skip;
|
|
36
38
|
|
|
37
39
|
testFn(`Adoption test ${index + 1}`, () => {
|
|
@@ -9,7 +9,7 @@ describe("Tree Construction Adoption02 Tests", () => {
|
|
|
9
9
|
"utf8",
|
|
10
10
|
);
|
|
11
11
|
const sections = content.split("#data\n").slice(1);
|
|
12
|
-
const passingTests = [1];
|
|
12
|
+
const passingTests = [1, 2];
|
|
13
13
|
|
|
14
14
|
sections.forEach((section, index) => {
|
|
15
15
|
const lines = section.trim().split("\n");
|