@tkeron/html-parser 1.1.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm_deploy.yml +14 -4
- package/README.md +6 -6
- package/bun.lock +6 -8
- package/check-versions.ts +147 -0
- package/index.ts +4 -8
- package/package.json +5 -6
- package/src/dom-simulator/append-child.ts +130 -0
- package/src/dom-simulator/append.ts +18 -0
- package/src/dom-simulator/attributes.ts +23 -0
- package/src/dom-simulator/clone-node.ts +51 -0
- package/src/dom-simulator/convert-ast-node-to-dom.ts +37 -0
- package/src/dom-simulator/create-cdata.ts +18 -0
- package/src/dom-simulator/create-comment.ts +23 -0
- package/src/dom-simulator/create-doctype.ts +24 -0
- package/src/dom-simulator/create-document.ts +81 -0
- package/src/dom-simulator/create-element.ts +195 -0
- package/src/dom-simulator/create-processing-instruction.ts +19 -0
- package/src/dom-simulator/create-temp-parent.ts +9 -0
- package/src/dom-simulator/create-text-node.ts +23 -0
- package/src/dom-simulator/escape-text-content.ts +6 -0
- package/src/dom-simulator/find-special-elements.ts +14 -0
- package/src/dom-simulator/get-text-content.ts +18 -0
- package/src/dom-simulator/index.ts +36 -0
- package/src/dom-simulator/inner-outer-html.ts +182 -0
- package/src/dom-simulator/insert-after.ts +20 -0
- package/src/dom-simulator/insert-before.ts +108 -0
- package/src/dom-simulator/matches.ts +26 -0
- package/src/dom-simulator/node-types.ts +26 -0
- package/src/dom-simulator/prepend.ts +24 -0
- package/src/dom-simulator/remove-child.ts +68 -0
- package/src/dom-simulator/remove.ts +7 -0
- package/src/dom-simulator/replace-child.ts +152 -0
- package/src/dom-simulator/set-text-content.ts +33 -0
- package/src/dom-simulator/update-element-content.ts +56 -0
- package/src/dom-simulator.ts +12 -1126
- package/src/encoding/constants.ts +8 -0
- package/src/encoding/detect-encoding.ts +21 -0
- package/src/encoding/index.ts +1 -0
- package/src/encoding/normalize-encoding.ts +6 -0
- package/src/html-entities.ts +2127 -0
- package/src/index.ts +5 -5
- package/src/parser/adoption-agency-helpers.ts +145 -0
- package/src/parser/constants.ts +137 -0
- package/src/parser/dom-to-ast.ts +79 -0
- package/src/parser/index.ts +9 -0
- package/src/parser/parse.ts +772 -0
- package/src/parser/types.ts +56 -0
- package/src/selectors/find-elements-descendant.ts +47 -0
- package/src/selectors/index.ts +2 -0
- package/src/selectors/matches-selector.ts +12 -0
- package/src/selectors/matches-token.ts +27 -0
- package/src/selectors/parse-selector.ts +48 -0
- package/src/selectors/query-selector-all.ts +43 -0
- package/src/selectors/query-selector.ts +6 -0
- package/src/selectors/types.ts +10 -0
- package/src/serializer/attributes.ts +74 -0
- package/src/serializer/escape.ts +13 -0
- package/src/serializer/index.ts +1 -0
- package/src/serializer/serialize-tokens.ts +511 -0
- package/src/tokenizer/calculate-position.ts +10 -0
- package/src/tokenizer/constants.ts +11 -0
- package/src/tokenizer/decode-entities.ts +64 -0
- package/src/tokenizer/index.ts +2 -0
- package/src/tokenizer/parse-attributes.ts +74 -0
- package/src/tokenizer/tokenize.ts +165 -0
- package/src/tokenizer/types.ts +25 -0
- package/tests/adoption-agency-helpers.test.ts +304 -0
- package/tests/advanced.test.ts +242 -221
- package/tests/cloneNode.test.ts +19 -66
- package/tests/custom-elements-head.test.ts +54 -55
- package/tests/dom-extended.test.ts +77 -64
- package/tests/dom-manipulation.test.ts +51 -24
- package/tests/dom.test.ts +15 -13
- package/tests/encoding/detect-encoding.test.ts +33 -0
- package/tests/google-dom.test.ts +2 -2
- package/tests/helpers/tokenizer-adapter.test.ts +29 -43
- package/tests/helpers/tokenizer-adapter.ts +36 -33
- package/tests/helpers/tree-adapter.test.ts +20 -20
- package/tests/helpers/tree-adapter.ts +34 -24
- package/tests/html-entities-text.test.ts +6 -2
- package/tests/innerhtml-void-elements.test.ts +52 -36
- package/tests/outerHTML-replacement.test.ts +37 -65
- package/tests/parser/dom-to-ast.test.ts +109 -0
- package/tests/parser/parse.test.ts +139 -0
- package/tests/parser.test.ts +281 -217
- package/tests/selectors/query-selector-all.test.ts +39 -0
- package/tests/selectors/query-selector.test.ts +42 -0
- package/tests/serializer/attributes.test.ts +132 -0
- package/tests/serializer/escape.test.ts +51 -0
- package/tests/serializer/serialize-tokens.test.ts +80 -0
- package/tests/serializer-core.test.ts +6 -6
- package/tests/serializer-injectmeta.test.ts +6 -6
- package/tests/serializer-optionaltags.test.ts +9 -6
- package/tests/serializer-options.test.ts +6 -6
- package/tests/serializer-whitespace.test.ts +6 -6
- package/tests/tokenizer/calculate-position.test.ts +34 -0
- package/tests/tokenizer/decode-entities.test.ts +31 -0
- package/tests/tokenizer/parse-attributes.test.ts +44 -0
- package/tests/tokenizer/tokenize.test.ts +757 -0
- package/tests/tokenizer-namedEntities.test.ts +10 -7
- package/tests/tokenizer-pendingSpecChanges.test.ts +10 -7
- package/tests/tokenizer.test.ts +268 -256
- package/tests/tree-construction-adoption01.test.ts +25 -16
- package/tests/tree-construction-adoption02.test.ts +30 -19
- package/tests/tree-construction-domjs-unsafe.test.ts +6 -4
- package/tests/tree-construction-entities02.test.ts +18 -16
- package/tests/tree-construction-html5test-com.test.ts +16 -10
- package/tests/tree-construction-math.test.ts +11 -9
- package/tests/tree-construction-namespace-sensitivity.test.ts +11 -9
- package/tests/tree-construction-noscript01.test.ts +11 -9
- package/tests/tree-construction-ruby.test.ts +6 -4
- package/tests/tree-construction-scriptdata01.test.ts +6 -4
- package/tests/tree-construction-svg.test.ts +6 -4
- package/tests/tree-construction-template.test.ts +6 -4
- package/tests/tree-construction-tests10.test.ts +6 -4
- package/tests/tree-construction-tests11.test.ts +6 -4
- package/tests/tree-construction-tests20.test.ts +7 -4
- package/tests/tree-construction-tests21.test.ts +7 -4
- package/tests/tree-construction-tests23.test.ts +7 -4
- package/tests/tree-construction-tests24.test.ts +7 -4
- package/tests/tree-construction-tests5.test.ts +6 -5
- package/tests/tree-construction-tests6.test.ts +6 -5
- package/tests/tree-construction-tests_innerHTML_1.test.ts +6 -5
- package/tests/void-elements.test.ts +85 -40
- package/tsconfig.json +1 -1
- package/src/css-selector.ts +0 -185
- package/src/encoding.ts +0 -39
- package/src/parser.ts +0 -682
- package/src/serializer.ts +0 -450
- package/src/tokenizer.ts +0 -325
- package/tests/selectors.test.ts +0 -128
|
@@ -3,10 +3,6 @@ import { parseHTML } from "../index";
|
|
|
3
3
|
|
|
4
4
|
describe("outerHTML replacement - Browser behavior", () => {
|
|
5
5
|
it("should replace element with its innerHTML when setting outerHTML = innerHTML", () => {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
6
|
const doc = parseHTML(`
|
|
11
7
|
<html>
|
|
12
8
|
<body>
|
|
@@ -16,35 +12,29 @@ describe("outerHTML replacement - Browser behavior", () => {
|
|
|
16
12
|
</body>
|
|
17
13
|
</html>
|
|
18
14
|
`);
|
|
19
|
-
|
|
15
|
+
|
|
20
16
|
const elem = doc.querySelector("#mi-prueba");
|
|
21
17
|
expect(elem).not.toBeNull();
|
|
22
|
-
|
|
23
|
-
|
|
18
|
+
|
|
24
19
|
const innerHTML = elem!.innerHTML;
|
|
25
20
|
expect(innerHTML).toContain("<strong>Lorem ipsum!</strong>");
|
|
26
21
|
expect(innerHTML).toContain("Dolor sit amet consectetur.");
|
|
27
|
-
|
|
28
|
-
|
|
22
|
+
|
|
29
23
|
const parent = elem!.parentNode;
|
|
30
24
|
expect(parent).not.toBeNull();
|
|
31
25
|
expect(parent!.childNodes).toContain(elem);
|
|
32
|
-
|
|
33
|
-
|
|
26
|
+
|
|
34
27
|
elem!.outerHTML = innerHTML;
|
|
35
|
-
|
|
36
|
-
|
|
28
|
+
|
|
37
29
|
const elemAfter = doc.querySelector("#mi-prueba");
|
|
38
30
|
expect(elemAfter).toBeNull();
|
|
39
|
-
|
|
40
|
-
|
|
31
|
+
|
|
41
32
|
const body = doc.querySelector("body");
|
|
42
33
|
expect(body!.innerHTML).toContain("<strong>Lorem ipsum!</strong>");
|
|
43
34
|
expect(body!.innerHTML).toContain("Dolor sit amet consectetur.");
|
|
44
|
-
|
|
45
|
-
|
|
35
|
+
|
|
46
36
|
expect(body!.innerHTML).not.toContain('id="mi-prueba"');
|
|
47
|
-
expect(body!.innerHTML).not.toContain(
|
|
37
|
+
expect(body!.innerHTML).not.toContain("style=");
|
|
48
38
|
});
|
|
49
39
|
|
|
50
40
|
it("should replace element with simple text content", () => {
|
|
@@ -53,20 +43,17 @@ describe("outerHTML replacement - Browser behavior", () => {
|
|
|
53
43
|
<p id="paragraph" class="styled">Simple text</p>
|
|
54
44
|
</div>
|
|
55
45
|
`);
|
|
56
|
-
|
|
46
|
+
|
|
57
47
|
const paragraph = doc.querySelector("#paragraph");
|
|
58
48
|
expect(paragraph).not.toBeNull();
|
|
59
|
-
|
|
49
|
+
|
|
60
50
|
const parent = paragraph!.parentNode;
|
|
61
51
|
const innerHTML = paragraph!.innerHTML;
|
|
62
|
-
|
|
63
|
-
|
|
52
|
+
|
|
64
53
|
paragraph!.outerHTML = innerHTML;
|
|
65
|
-
|
|
66
|
-
|
|
54
|
+
|
|
67
55
|
expect(doc.querySelector("#paragraph")).toBeNull();
|
|
68
|
-
|
|
69
|
-
|
|
56
|
+
|
|
70
57
|
expect(parent!.textContent).toContain("Simple text");
|
|
71
58
|
});
|
|
72
59
|
|
|
@@ -79,20 +66,17 @@ describe("outerHTML replacement - Browser behavior", () => {
|
|
|
79
66
|
</li>
|
|
80
67
|
</ul>
|
|
81
68
|
`);
|
|
82
|
-
|
|
69
|
+
|
|
83
70
|
const container = doc.querySelector("#item-container");
|
|
84
71
|
expect(container).not.toBeNull();
|
|
85
|
-
|
|
72
|
+
|
|
86
73
|
const ul = doc.querySelector("ul");
|
|
87
74
|
const innerHTML = container!.innerHTML;
|
|
88
|
-
|
|
89
|
-
|
|
75
|
+
|
|
90
76
|
container!.outerHTML = innerHTML;
|
|
91
|
-
|
|
92
|
-
|
|
77
|
+
|
|
93
78
|
expect(doc.querySelector("#item-container")).toBeNull();
|
|
94
|
-
|
|
95
|
-
|
|
79
|
+
|
|
96
80
|
const spans = ul!.querySelectorAll("span");
|
|
97
81
|
expect(spans.length).toBe(2);
|
|
98
82
|
expect(spans[0]?.textContent).toBe("Item 1");
|
|
@@ -105,20 +89,17 @@ describe("outerHTML replacement - Browser behavior", () => {
|
|
|
105
89
|
<span id="to-remove"></span>
|
|
106
90
|
</div>
|
|
107
91
|
`);
|
|
108
|
-
|
|
92
|
+
|
|
109
93
|
const span = doc.querySelector("#to-remove");
|
|
110
94
|
expect(span).not.toBeNull();
|
|
111
|
-
|
|
95
|
+
|
|
112
96
|
const parent = span!.parentNode;
|
|
113
97
|
const childCountBefore = parent!.childNodes.length;
|
|
114
|
-
|
|
115
|
-
|
|
98
|
+
|
|
116
99
|
span!.outerHTML = "";
|
|
117
|
-
|
|
118
|
-
|
|
100
|
+
|
|
119
101
|
expect(doc.querySelector("#to-remove")).toBeNull();
|
|
120
|
-
|
|
121
|
-
|
|
102
|
+
|
|
122
103
|
expect(parent!.childNodes.length).toBe(childCountBefore - 1);
|
|
123
104
|
});
|
|
124
105
|
|
|
@@ -128,19 +109,16 @@ describe("outerHTML replacement - Browser behavior", () => {
|
|
|
128
109
|
<p id="old">Old content</p>
|
|
129
110
|
</div>
|
|
130
111
|
`);
|
|
131
|
-
|
|
112
|
+
|
|
132
113
|
const oldParagraph = doc.querySelector("#old");
|
|
133
114
|
expect(oldParagraph).not.toBeNull();
|
|
134
|
-
|
|
115
|
+
|
|
135
116
|
const parent = oldParagraph!.parentNode;
|
|
136
|
-
|
|
137
|
-
|
|
117
|
+
|
|
138
118
|
oldParagraph!.outerHTML = '<div id="new">New content</div>';
|
|
139
|
-
|
|
140
|
-
|
|
119
|
+
|
|
141
120
|
expect(doc.querySelector("#old")).toBeNull();
|
|
142
|
-
|
|
143
|
-
|
|
121
|
+
|
|
144
122
|
const newDiv = doc.querySelector("#new");
|
|
145
123
|
expect(newDiv).not.toBeNull();
|
|
146
124
|
expect(newDiv!.textContent).toBe("New content");
|
|
@@ -155,19 +133,16 @@ describe("outerHTML replacement - Browser behavior", () => {
|
|
|
155
133
|
<span>Last</span>
|
|
156
134
|
</div>
|
|
157
135
|
`);
|
|
158
|
-
|
|
136
|
+
|
|
159
137
|
const middle = doc.querySelector("#middle");
|
|
160
138
|
const firstSpan = doc.querySelectorAll("span")[0];
|
|
161
139
|
const lastSpan = doc.querySelectorAll("span")[1];
|
|
162
|
-
|
|
163
|
-
|
|
140
|
+
|
|
164
141
|
middle!.outerHTML = middle!.innerHTML;
|
|
165
|
-
|
|
166
|
-
|
|
142
|
+
|
|
167
143
|
expect(firstSpan!.nextSibling).not.toBe(middle);
|
|
168
144
|
expect(lastSpan!.previousSibling).not.toBe(middle);
|
|
169
|
-
|
|
170
|
-
|
|
145
|
+
|
|
171
146
|
const parent = firstSpan!.parentNode;
|
|
172
147
|
expect(parent!.textContent).toContain("Middle");
|
|
173
148
|
});
|
|
@@ -185,21 +160,18 @@ describe("outerHTML replacement - Browser behavior", () => {
|
|
|
185
160
|
</section>
|
|
186
161
|
</article>
|
|
187
162
|
`);
|
|
188
|
-
|
|
163
|
+
|
|
189
164
|
const wrapper = doc.querySelector("#wrapper");
|
|
190
165
|
expect(wrapper).not.toBeNull();
|
|
191
|
-
|
|
166
|
+
|
|
192
167
|
const article = doc.querySelector("article");
|
|
193
168
|
const innerHTML = wrapper!.innerHTML;
|
|
194
|
-
|
|
195
|
-
|
|
169
|
+
|
|
196
170
|
wrapper!.outerHTML = innerHTML;
|
|
197
|
-
|
|
198
|
-
|
|
171
|
+
|
|
199
172
|
expect(doc.querySelector("#wrapper")).toBeNull();
|
|
200
173
|
expect(doc.querySelector("section")).toBeNull();
|
|
201
|
-
|
|
202
|
-
|
|
174
|
+
|
|
203
175
|
expect(article!.querySelector("h2")).not.toBeNull();
|
|
204
176
|
expect(article!.querySelector("h2")!.textContent).toBe("Title");
|
|
205
177
|
expect(article!.querySelector("strong")).not.toBeNull();
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import { domToAST } from "../../src/parser/index";
|
|
2
|
+
import {
|
|
3
|
+
createDocument,
|
|
4
|
+
createElement,
|
|
5
|
+
createTextNode,
|
|
6
|
+
createComment,
|
|
7
|
+
createDoctype,
|
|
8
|
+
} from "../../src/dom-simulator/index.js";
|
|
9
|
+
|
|
10
|
+
it("should convert document to AST", () => {
|
|
11
|
+
const doc = createDocument();
|
|
12
|
+
const ast = domToAST(doc);
|
|
13
|
+
expect(ast.type).toBe("document");
|
|
14
|
+
expect(ast.children).toEqual([]);
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
it("should convert element to AST", () => {
|
|
18
|
+
const element = createElement("div", { class: "test", id: "mydiv" });
|
|
19
|
+
const ast = domToAST(element);
|
|
20
|
+
expect(ast.type).toBe("element");
|
|
21
|
+
expect(ast.tagName).toBe("div");
|
|
22
|
+
expect(ast.attributes).toEqual({ class: "test", id: "mydiv" });
|
|
23
|
+
expect(ast.children).toEqual([]);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it("should convert element with children to AST", () => {
|
|
27
|
+
const element = createElement("div", {});
|
|
28
|
+
const text = createTextNode("Hello");
|
|
29
|
+
element.appendChild(text);
|
|
30
|
+
const ast = domToAST(element);
|
|
31
|
+
expect(ast.type).toBe("element");
|
|
32
|
+
expect(ast.tagName).toBe("div");
|
|
33
|
+
expect(ast.children).toHaveLength(1);
|
|
34
|
+
expect(ast.children[0].type).toBe("text");
|
|
35
|
+
expect(ast.children[0].content).toBe("Hello");
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it("should convert text node to AST", () => {
|
|
39
|
+
const text = createTextNode("Hello world");
|
|
40
|
+
const ast = domToAST(text);
|
|
41
|
+
expect(ast.type).toBe("text");
|
|
42
|
+
expect(ast.content).toBe("Hello world");
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("should convert comment to AST", () => {
|
|
46
|
+
const comment = createComment("This is a comment");
|
|
47
|
+
const ast = domToAST(comment);
|
|
48
|
+
expect(ast.type).toBe("comment");
|
|
49
|
+
expect(ast.content).toBe("This is a comment");
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("should convert doctype to AST", () => {
|
|
53
|
+
const doctype = createDoctype("html");
|
|
54
|
+
const ast = domToAST(doctype);
|
|
55
|
+
expect(ast.type).toBe("doctype");
|
|
56
|
+
expect(ast.name).toBe("html");
|
|
57
|
+
expect(ast.publicId).toBe("");
|
|
58
|
+
expect(ast.systemId).toBe("");
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it("should convert doctype with ids to AST", () => {
|
|
62
|
+
const doctype = createDoctype("html", "public", "system");
|
|
63
|
+
const ast = domToAST(doctype);
|
|
64
|
+
expect(ast.type).toBe("doctype");
|
|
65
|
+
expect(ast.name).toBe("html");
|
|
66
|
+
expect(ast.publicId).toBe("public");
|
|
67
|
+
expect(ast.systemId).toBe("system");
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it("should handle null input", () => {
|
|
71
|
+
const ast = domToAST(null);
|
|
72
|
+
expect(ast.type).toBe("document");
|
|
73
|
+
expect(ast.children).toEqual([]);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it("should convert nested elements to AST", () => {
|
|
77
|
+
const div = createElement("div", { class: "container" });
|
|
78
|
+
const span = createElement("span", { class: "inner" });
|
|
79
|
+
const text = createTextNode("content");
|
|
80
|
+
span.appendChild(text);
|
|
81
|
+
div.appendChild(span);
|
|
82
|
+
const ast = domToAST(div);
|
|
83
|
+
expect(ast.type).toBe("element");
|
|
84
|
+
expect(ast.tagName).toBe("div");
|
|
85
|
+
expect(ast.attributes).toEqual({ class: "container" });
|
|
86
|
+
expect(ast.children).toHaveLength(1);
|
|
87
|
+
const childAst = ast.children[0];
|
|
88
|
+
expect(childAst.type).toBe("element");
|
|
89
|
+
expect(childAst.tagName).toBe("span");
|
|
90
|
+
expect(childAst.attributes).toEqual({ class: "inner" });
|
|
91
|
+
expect(childAst.children).toHaveLength(1);
|
|
92
|
+
expect(childAst.children[0].type).toBe("text");
|
|
93
|
+
expect(childAst.children[0].content).toBe("content");
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it("should convert element with namespace to AST", () => {
|
|
97
|
+
const element = createElement("svg", {}, "http://www.w3.org/2000/svg");
|
|
98
|
+
const ast = domToAST(element);
|
|
99
|
+
expect(ast.type).toBe("element");
|
|
100
|
+
expect(ast.tagName).toBe("svg");
|
|
101
|
+
expect(ast.namespaceURI).toBe("http://www.w3.org/2000/svg");
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it("should handle unknown node types", () => {
|
|
105
|
+
const unknownNode = { nodeType: 999 };
|
|
106
|
+
const ast = domToAST(unknownNode);
|
|
107
|
+
expect(ast.type).toBe("text");
|
|
108
|
+
expect(ast.content).toBe("");
|
|
109
|
+
});
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import { parse } from "../../src/parser/index";
|
|
2
|
+
import { tokenize } from "../../src/tokenizer/index";
|
|
3
|
+
|
|
4
|
+
it("should parse empty HTML", () => {
|
|
5
|
+
const tokens = tokenize("");
|
|
6
|
+
const result = parse(tokens);
|
|
7
|
+
expect(result.nodeType).toBe(9);
|
|
8
|
+
expect(result.childNodes).toHaveLength(1);
|
|
9
|
+
expect(result.documentElement.tagName).toBe("HTML");
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
it("should parse simple HTML", () => {
|
|
13
|
+
const tokens = tokenize("<html><body>Hello</body></html>");
|
|
14
|
+
const result = parse(tokens);
|
|
15
|
+
expect(result.nodeType).toBe(9);
|
|
16
|
+
expect(result.documentElement.tagName).toBe("HTML");
|
|
17
|
+
expect(result.body.tagName).toBe("BODY");
|
|
18
|
+
expect(result.body.childNodes[0].textContent).toBe("Hello");
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it("should parse HTML with doctype", () => {
|
|
22
|
+
const tokens = tokenize("<!DOCTYPE html><html></html>");
|
|
23
|
+
const result = parse(tokens);
|
|
24
|
+
expect(result.doctype.name).toBe("html");
|
|
25
|
+
expect(result.documentElement.tagName).toBe("HTML");
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it("should parse HTML with comments", () => {
|
|
29
|
+
const tokens = tokenize("<!-- comment --><html></html>");
|
|
30
|
+
const result = parse(tokens);
|
|
31
|
+
expect(result.childNodes[0].nodeType).toBe(8);
|
|
32
|
+
expect(result.childNodes[0].textContent).toBe(" comment ");
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("should parse HTML with attributes", () => {
|
|
36
|
+
const tokens = tokenize('<div class="test" id="mydiv">content</div>');
|
|
37
|
+
const result = parse(tokens);
|
|
38
|
+
const div = result.body.childNodes[0];
|
|
39
|
+
expect(div.tagName).toBe("DIV");
|
|
40
|
+
expect(div.attributes.class).toBe("test");
|
|
41
|
+
expect(div.attributes.id).toBe("mydiv");
|
|
42
|
+
expect(div.childNodes[0].textContent).toBe("content");
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("should parse self-closing tags", () => {
|
|
46
|
+
const tokens = tokenize("<img src='test.jpg' />");
|
|
47
|
+
const result = parse(tokens);
|
|
48
|
+
const img = result.body.childNodes[0];
|
|
49
|
+
expect(img.tagName).toBe("IMG");
|
|
50
|
+
expect(img.attributes.src).toBe("test.jpg");
|
|
51
|
+
expect(img.childNodes).toHaveLength(0);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it("should parse void elements", () => {
|
|
55
|
+
const tokens = tokenize("<br><hr><input>");
|
|
56
|
+
const result = parse(tokens);
|
|
57
|
+
expect(result.body.childNodes).toHaveLength(3);
|
|
58
|
+
expect(result.body.childNodes[0].tagName).toBe("BR");
|
|
59
|
+
expect(result.body.childNodes[1].tagName).toBe("HR");
|
|
60
|
+
expect(result.body.childNodes[2].tagName).toBe("INPUT");
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it("should handle unclosed tags", () => {
|
|
64
|
+
const tokens = tokenize("<div><p>Hello");
|
|
65
|
+
const result = parse(tokens);
|
|
66
|
+
expect(result.body.childNodes[0].tagName).toBe("DIV");
|
|
67
|
+
expect(result.body.childNodes[0].childNodes[0].tagName).toBe("P");
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it("should parse nested elements", () => {
|
|
71
|
+
const tokens = tokenize("<div><span>inner</span></div>");
|
|
72
|
+
const result = parse(tokens);
|
|
73
|
+
const div = result.body.childNodes[0];
|
|
74
|
+
expect(div.tagName).toBe("DIV");
|
|
75
|
+
const span = div.childNodes[0];
|
|
76
|
+
expect(span.tagName).toBe("SPAN");
|
|
77
|
+
expect(span.childNodes[0].textContent).toBe("inner");
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
it("should parse multiple root elements", () => {
|
|
81
|
+
const tokens = tokenize("<div>first</div><div>second</div>");
|
|
82
|
+
const result = parse(tokens);
|
|
83
|
+
expect(result.body.childNodes).toHaveLength(2);
|
|
84
|
+
expect(result.body.childNodes[0].childNodes[0].textContent).toBe("first");
|
|
85
|
+
expect(result.body.childNodes[1].childNodes[0].textContent).toBe("second");
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it("should handle malformed HTML", () => {
|
|
89
|
+
const tokens = tokenize("<div><p>Hello</div>");
|
|
90
|
+
const result = parse(tokens);
|
|
91
|
+
expect(result.body.childNodes[0].tagName).toBe("DIV");
|
|
92
|
+
expect(result.body.childNodes[0].childNodes[0].tagName).toBe("P");
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it("should parse HTML with head and body", () => {
|
|
96
|
+
const tokens = tokenize(
|
|
97
|
+
"<html><head><title>Test</title></head><body>Content</body></html>",
|
|
98
|
+
);
|
|
99
|
+
const result = parse(tokens);
|
|
100
|
+
expect(result.head.tagName).toBe("HEAD");
|
|
101
|
+
expect(result.head.childNodes[0].tagName).toBe("TITLE");
|
|
102
|
+
expect(result.body.tagName).toBe("BODY");
|
|
103
|
+
expect(result.body.childNodes[0].textContent).toBe("Content");
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
it("should parse HTML without explicit html tag", () => {
|
|
107
|
+
const tokens = tokenize(
|
|
108
|
+
"<head><title>Test</title></head><body>Content</body>",
|
|
109
|
+
);
|
|
110
|
+
const result = parse(tokens);
|
|
111
|
+
expect(result.documentElement.tagName).toBe("HTML");
|
|
112
|
+
expect(result.head.tagName).toBe("HEAD");
|
|
113
|
+
expect(result.body.tagName).toBe("BODY");
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it("should parse text content", () => {
|
|
117
|
+
const tokens = tokenize("Hello <b>world</b>!");
|
|
118
|
+
const result = parse(tokens);
|
|
119
|
+
expect(result.body.childNodes).toHaveLength(3);
|
|
120
|
+
expect(result.body.childNodes[0].textContent).toBe("Hello ");
|
|
121
|
+
expect(result.body.childNodes[1].tagName).toBe("B");
|
|
122
|
+
expect(result.body.childNodes[2].textContent).toBe("!");
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
it("should handle whitespace", () => {
|
|
126
|
+
const tokens = tokenize(" <div> content </div> ");
|
|
127
|
+
const result = parse(tokens);
|
|
128
|
+
const div = result.body.childNodes[0];
|
|
129
|
+
expect(div.tagName).toBe("DIV");
|
|
130
|
+
expect(div.childNodes[0].textContent).toBe(" content ");
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it("should parse custom elements", () => {
|
|
134
|
+
const tokens = tokenize("<my-element>content</my-element>");
|
|
135
|
+
const result = parse(tokens);
|
|
136
|
+
const element = result.body?.childNodes[0] as any;
|
|
137
|
+
expect(element?.tagName).toBe("MY-ELEMENT");
|
|
138
|
+
expect(element?.childNodes[0]?.textContent).toBe("content");
|
|
139
|
+
});
|