@tkeron/html-parser 0.1.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/README.md +1 -7
  2. package/bun.lock +8 -3
  3. package/index.ts +4 -0
  4. package/package.json +13 -6
  5. package/src/css-selector.ts +45 -27
  6. package/src/dom-simulator.ts +162 -20
  7. package/src/encoding.ts +39 -0
  8. package/src/index.ts +9 -0
  9. package/src/parser.ts +478 -183
  10. package/src/serializer.ts +450 -0
  11. package/src/tokenizer.ts +59 -139
  12. package/tests/advanced.test.ts +119 -106
  13. package/tests/custom-elements.test.ts +172 -162
  14. package/tests/dom-extended.test.ts +12 -12
  15. package/tests/dom-manipulation.test.ts +637 -0
  16. package/tests/dom.test.ts +32 -27
  17. package/tests/helpers/tokenizer-adapter.test.ts +70 -0
  18. package/tests/helpers/tokenizer-adapter.ts +65 -0
  19. package/tests/helpers/tree-adapter.test.ts +39 -0
  20. package/tests/helpers/tree-adapter.ts +43 -0
  21. package/tests/html5lib-data/tokenizer/namedEntities.test +42422 -0
  22. package/tests/html5lib-data/tokenizer/pendingSpecChanges.test +9 -0
  23. package/tests/html5lib-data/tree-construction/adoption01.dat +354 -0
  24. package/tests/html5lib-data/tree-construction/adoption02.dat +39 -0
  25. package/tests/html5lib-data/tree-construction/domjs-unsafe.dat +0 -0
  26. package/tests/html5lib-data/tree-construction/entities02.dat +309 -0
  27. package/tests/html5lib-data/tree-construction/html5test-com.dat +301 -0
  28. package/tests/html5lib-data/tree-construction/math.dat +104 -0
  29. package/tests/html5lib-data/tree-construction/namespace-sensitivity.dat +22 -0
  30. package/tests/html5lib-data/tree-construction/noscript01.dat +237 -0
  31. package/tests/html5lib-data/tree-construction/ruby.dat +302 -0
  32. package/tests/html5lib-data/tree-construction/scriptdata01.dat +372 -0
  33. package/tests/html5lib-data/tree-construction/svg.dat +104 -0
  34. package/tests/html5lib-data/tree-construction/template.dat +1673 -0
  35. package/tests/html5lib-data/tree-construction/tests10.dat +853 -0
  36. package/tests/html5lib-data/tree-construction/tests11.dat +523 -0
  37. package/tests/html5lib-data/tree-construction/tests20.dat +842 -0
  38. package/tests/html5lib-data/tree-construction/tests21.dat +306 -0
  39. package/tests/html5lib-data/tree-construction/tests23.dat +168 -0
  40. package/tests/html5lib-data/tree-construction/tests24.dat +79 -0
  41. package/tests/html5lib-data/tree-construction/tests5.dat +210 -0
  42. package/tests/html5lib-data/tree-construction/tests6.dat +663 -0
  43. package/tests/html5lib-data/tree-construction/tests_innerHTML_1.dat +844 -0
  44. package/tests/parser.test.ts +172 -193
  45. package/tests/selectors.test.ts +64 -1
  46. package/tests/serializer-core.test.ts +16 -0
  47. package/tests/serializer-data/core.test +125 -0
  48. package/tests/serializer-data/injectmeta.test +66 -0
  49. package/tests/serializer-data/optionaltags.test +965 -0
  50. package/tests/serializer-data/options.test +60 -0
  51. package/tests/serializer-data/whitespace.test +51 -0
  52. package/tests/serializer-injectmeta.test.ts +16 -0
  53. package/tests/serializer-optionaltags.test.ts +16 -0
  54. package/tests/serializer-options.test.ts +16 -0
  55. package/tests/serializer-whitespace.test.ts +16 -0
  56. package/tests/tokenizer-namedEntities.test.ts +20 -0
  57. package/tests/tokenizer-pendingSpecChanges.test.ts +20 -0
  58. package/tests/tokenizer.test.ts +83 -0
  59. package/tests/tree-construction-adoption01.test.ts +37 -0
  60. package/tests/tree-construction-adoption02.test.ts +34 -0
  61. package/tests/tree-construction-domjs-unsafe.test.ts +24 -0
  62. package/tests/tree-construction-entities02.test.ts +33 -0
  63. package/tests/tree-construction-html5test-com.test.ts +24 -0
  64. package/tests/tree-construction-math.test.ts +18 -0
  65. package/tests/tree-construction-namespace-sensitivity.test.ts +18 -0
  66. package/tests/tree-construction-noscript01.test.ts +18 -0
  67. package/tests/tree-construction-ruby.test.ts +21 -0
  68. package/tests/tree-construction-scriptdata01.test.ts +21 -0
  69. package/tests/tree-construction-svg.test.ts +21 -0
  70. package/tests/tree-construction-template.test.ts +21 -0
  71. package/tests/tree-construction-tests10.test.ts +21 -0
  72. package/tests/tree-construction-tests11.test.ts +21 -0
  73. package/tests/tree-construction-tests20.test.ts +18 -0
  74. package/tests/tree-construction-tests21.test.ts +18 -0
  75. package/tests/tree-construction-tests23.test.ts +18 -0
  76. package/tests/tree-construction-tests24.test.ts +18 -0
  77. package/tests/tree-construction-tests5.test.ts +21 -0
  78. package/tests/tree-construction-tests6.test.ts +21 -0
  79. package/tests/tree-construction-tests_innerHTML_1.test.ts +21 -0
  80. package/tests/void-elements.test.ts +471 -0
  81. package/tests/official/README.md +0 -87
  82. package/tests/official/acid/acid-tests.test.ts +0 -309
  83. package/tests/official/final-output/final-output.test.ts +0 -361
  84. package/tests/official/html5lib/tokenizer-utils.ts +0 -192
  85. package/tests/official/html5lib/tokenizer.test.ts +0 -171
  86. package/tests/official/html5lib/tree-construction-utils.ts +0 -194
  87. package/tests/official/html5lib/tree-construction.test.ts +0 -250
  88. package/tests/official/validator/validator-tests.test.ts +0 -237
  89. package/tests/official/validator-nu/validator-nu.test.ts +0 -335
  90. package/tests/official/whatwg/whatwg-tests.test.ts +0 -205
  91. package/tests/official/wpt/wpt-tests.test.ts +0 -409
@@ -0,0 +1,21 @@
1
+ import { describe, it, expect } from "bun:test";
2
+ import { readFileSync } from "fs";
3
+ import { parse } from "../src/index.ts";
4
+
5
+ describe("Tree Construction Tests11 Tests", () => {
6
+ const content = readFileSync("tests/html5lib-data/tree-construction/tests11.dat", "utf8");
7
+ const sections = content.split(/^#data$/gm).slice(1);
8
+
9
+ for (const section of sections) {
10
+ const [data, document] = section.split(/^#document$/gm);
11
+ const input = data.trim();
12
+ const expected = document.trim();
13
+
14
+ it(`Tests11 test: ${input.slice(0, 50)}${input.length > 50 ? "..." : ""}`, () => {
15
+ const doc = parse(input);
16
+ expect(doc).toBeDefined();
17
+ // TODO: Implement DOM serialization and comparison
18
+ // expect(serialize(doc)).toBe(expected);
19
+ });
20
+ }
21
+ });
@@ -0,0 +1,18 @@
1
+ import { readFileSync } from "fs";
2
+ import { parse } from "../src/index.ts";
3
+
4
+ describe("Tree Construction Tests20 Tests", () => {
5
+ const data = readFileSync("tests/html5lib-data/tree-construction/tests20.dat", "utf8");
6
+ const tests = data.split("#data\n").slice(1);
7
+
8
+ for (const test of tests) {
9
+ const [input, expected] = test.split("#document\n");
10
+ const title = input.trim().split("\n")[0] || "Unnamed test";
11
+ const html = input.trim();
12
+
13
+ it.skip(title, () => {
14
+ const doc = parse(html);
15
+ expect(doc).toBeDefined();
16
+ });
17
+ }
18
+ });
@@ -0,0 +1,18 @@
1
+ import { readFileSync } from "fs";
2
+ import { parse } from "../src/index.ts";
3
+
4
+ describe("Tree Construction Tests21 Tests", () => {
5
+ const data = readFileSync("tests/html5lib-data/tree-construction/tests21.dat", "utf8");
6
+ const tests = data.split("#data\n").slice(1);
7
+
8
+ for (const test of tests) {
9
+ const [input, expected] = test.split("#document\n");
10
+ const title = input.trim().split("\n")[0] || "Unnamed test";
11
+ const html = input.trim();
12
+
13
+ it.skip(title, () => {
14
+ const doc = parse(html);
15
+ expect(doc).toBeDefined();
16
+ });
17
+ }
18
+ });
@@ -0,0 +1,18 @@
1
+ import { readFileSync } from "fs";
2
+ import { parse } from "../src/index.ts";
3
+
4
+ describe("Tree Construction Tests23 Tests", () => {
5
+ const data = readFileSync("tests/html5lib-data/tree-construction/tests23.dat", "utf8");
6
+ const tests = data.split("#data\n").slice(1);
7
+
8
+ for (const test of tests) {
9
+ const [input, expected] = test.split("#document\n");
10
+ const title = input.trim().split("\n")[0] || "Unnamed test";
11
+ const html = input.trim();
12
+
13
+ it.skip(title, () => {
14
+ const doc = parse(html);
15
+ expect(doc).toBeDefined();
16
+ });
17
+ }
18
+ });
@@ -0,0 +1,18 @@
1
+ import { readFileSync } from "fs";
2
+ import { parse } from "../src/index.ts";
3
+
4
+ describe("Tree Construction Tests24 Tests", () => {
5
+ const data = readFileSync("tests/html5lib-data/tree-construction/tests24.dat", "utf8");
6
+ const tests = data.split("#data\n").slice(1);
7
+
8
+ for (const test of tests) {
9
+ const [input, expected] = test.split("#document\n");
10
+ const title = input.trim().split("\n")[0] || "Unnamed test";
11
+ const html = input.trim();
12
+
13
+ it.skip(title, () => {
14
+ const doc = parse(html);
15
+ expect(doc).toBeDefined();
16
+ });
17
+ }
18
+ });
@@ -0,0 +1,21 @@
1
+ import { readFileSync } from "fs";
2
+ import { parse } from "../src/index.ts";
3
+
4
+ describe("Tree Construction Tests5 Tests", () => {
5
+ const content = readFileSync("tests/html5lib-data/tree-construction/tests5.dat", "utf8");
6
+ const sections = content.split("#data\n");
7
+
8
+ for (let i = 1; i < sections.length; i++) {
9
+ const section = sections[i];
10
+ const [dataPart, documentPart] = section.split("#document\n");
11
+ const data = dataPart.trim();
12
+ const expectedDocument = documentPart ? documentPart.split("#errors\n")[0].trim() : "";
13
+ const errors = documentPart && documentPart.includes("#errors\n") ? documentPart.split("#errors\n")[1].trim() : "";
14
+
15
+ it(`Tests5 test ${i}`, () => {
16
+ const doc = parse(data);
17
+ expect(doc).toBeDefined();
18
+ // TODO: Implement DOM serialization and comparison
19
+ });
20
+ }
21
+ });
@@ -0,0 +1,21 @@
1
+ import { readFileSync } from "fs";
2
+ import { parse } from "../src/index.ts";
3
+
4
+ describe("Tree Construction Tests6 Tests", () => {
5
+ const content = readFileSync("tests/html5lib-data/tree-construction/tests6.dat", "utf8");
6
+ const sections = content.split("#data\n");
7
+
8
+ for (let i = 1; i < sections.length; i++) {
9
+ const section = sections[i];
10
+ const [dataPart, documentPart] = section.split("#document\n");
11
+ const data = dataPart.trim();
12
+ const expectedDocument = documentPart ? documentPart.split("#errors\n")[0].trim() : "";
13
+ const errors = documentPart && documentPart.includes("#errors\n") ? documentPart.split("#errors\n")[1].trim() : "";
14
+
15
+ it(`Tests6 test ${i}`, () => {
16
+ const doc = parse(data);
17
+ expect(doc).toBeDefined();
18
+ // TODO: Implement DOM serialization and comparison
19
+ });
20
+ }
21
+ });
@@ -0,0 +1,21 @@
1
+ import { readFileSync } from "fs";
2
+ import { parse } from "../src/index.ts";
3
+
4
+ describe("Tree Construction Tests_innerHTML_1 Tests", () => {
5
+ const content = readFileSync("tests/html5lib-data/tree-construction/tests_innerHTML_1.dat", "utf8");
6
+ const sections = content.split("#data\n");
7
+
8
+ for (let i = 1; i < sections.length; i++) {
9
+ const section = sections[i];
10
+ const [dataPart, documentPart] = section.split("#document\n");
11
+ const data = dataPart.trim();
12
+ const expectedDocument = documentPart ? documentPart.split("#errors\n")[0].trim() : "";
13
+ const errors = documentPart && documentPart.includes("#errors\n") ? documentPart.split("#errors\n")[1].trim() : "";
14
+
15
+ it(`Tests_innerHTML_1 test ${i}`, () => {
16
+ const doc = parse(data);
17
+ expect(doc).toBeDefined();
18
+ // TODO: Implement DOM serialization and comparison
19
+ });
20
+ }
21
+ });
@@ -0,0 +1,471 @@
1
+ import { describe, it, expect } from "bun:test";
2
+ import { parseHTML } from "../index";
3
+
4
+ /**
5
+ * Test suite for HTML void elements serialization
6
+ *
7
+ * Void elements should NOT have closing tags according to HTML spec:
8
+ * https://html.spec.whatwg.org/multipage/syntax.html#void-elements
9
+ *
10
+ * List: area, base, br, col, embed, hr, img, input, link, meta, source, track, wbr
11
+ */
12
+
13
+ const VOID_ELEMENTS = [
14
+ "area",
15
+ "base",
16
+ "br",
17
+ "col",
18
+ "embed",
19
+ "hr",
20
+ "img",
21
+ "input",
22
+ "link",
23
+ "meta",
24
+ "source",
25
+ "track",
26
+ "wbr",
27
+ ];
28
+
29
+ describe("Void Elements - outerHTML serialization", () => {
30
+ describe("Individual void elements without attributes", () => {
31
+ it("should serialize <br> without closing tag", () => {
32
+ const doc = parseHTML("<html><body><br></body></html>");
33
+ const br = doc.querySelector("br");
34
+ expect(br).not.toBeNull();
35
+ expect(br!.outerHTML).toBe("<br>");
36
+ });
37
+
38
+ it("should serialize <hr> without closing tag", () => {
39
+ const doc = parseHTML("<html><body><hr></body></html>");
40
+ const hr = doc.querySelector("hr");
41
+ expect(hr).not.toBeNull();
42
+ expect(hr!.outerHTML).toBe("<hr>");
43
+ });
44
+
45
+ it("should serialize <wbr> without closing tag", () => {
46
+ const doc = parseHTML("<html><body><wbr></body></html>");
47
+ const wbr = doc.querySelector("wbr");
48
+ expect(wbr).not.toBeNull();
49
+ expect(wbr!.outerHTML).toBe("<wbr>");
50
+ });
51
+ });
52
+
53
+ describe("Individual void elements with attributes", () => {
54
+ it("should serialize <img> with attributes without closing tag", () => {
55
+ const doc = parseHTML('<html><body><img src="test.jpg" alt="test image"></body></html>');
56
+ const img = doc.querySelector("img");
57
+ expect(img).not.toBeNull();
58
+ expect(img!.outerHTML).toBe('<img src="test.jpg" alt="test image">');
59
+ });
60
+
61
+ it("should serialize <input> with type attribute without closing tag", () => {
62
+ const doc = parseHTML('<html><body><input type="text" name="username"></body></html>');
63
+ const input = doc.querySelector("input");
64
+ expect(input).not.toBeNull();
65
+ expect(input!.outerHTML).toBe('<input type="text" name="username">');
66
+ });
67
+
68
+ it("should serialize <meta> with attributes without closing tag", () => {
69
+ const doc = parseHTML('<html><head><meta charset="utf-8"></head><body></body></html>');
70
+ const meta = doc.querySelector("meta");
71
+ expect(meta).not.toBeNull();
72
+ expect(meta!.outerHTML).toBe('<meta charset="utf-8">');
73
+ });
74
+
75
+ it("should serialize <link> with attributes without closing tag", () => {
76
+ const doc = parseHTML('<html><head><link rel="stylesheet" href="style.css"></head><body></body></html>');
77
+ const link = doc.querySelector("link");
78
+ expect(link).not.toBeNull();
79
+ expect(link!.outerHTML).toBe('<link rel="stylesheet" href="style.css">');
80
+ });
81
+
82
+ it("should serialize <base> with href without closing tag", () => {
83
+ const doc = parseHTML('<html><head><base href="https://example.com/"></head><body></body></html>');
84
+ const base = doc.querySelector("base");
85
+ expect(base).not.toBeNull();
86
+ expect(base!.outerHTML).toBe('<base href="https://example.com/">');
87
+ });
88
+
89
+ it("should serialize <col> with attributes without closing tag", () => {
90
+ const doc = parseHTML('<html><body><table><colgroup><col span="2" style="background:red"></colgroup></table></body></html>');
91
+ const col = doc.querySelector("col");
92
+ expect(col).not.toBeNull();
93
+ expect(col!.outerHTML).toBe('<col span="2" style="background:red">');
94
+ });
95
+
96
+ it("should serialize <embed> with attributes without closing tag", () => {
97
+ const doc = parseHTML('<html><body><embed src="video.swf" type="application/x-shockwave-flash"></body></html>');
98
+ const embed = doc.querySelector("embed");
99
+ expect(embed).not.toBeNull();
100
+ expect(embed!.outerHTML).toBe('<embed src="video.swf" type="application/x-shockwave-flash">');
101
+ });
102
+
103
+ it("should serialize <source> with attributes without closing tag", () => {
104
+ const doc = parseHTML('<html><body><video><source src="video.mp4" type="video/mp4"></video></body></html>');
105
+ const source = doc.querySelector("source");
106
+ expect(source).not.toBeNull();
107
+ expect(source!.outerHTML).toBe('<source src="video.mp4" type="video/mp4">');
108
+ });
109
+
110
+ it("should serialize <track> with attributes without closing tag", () => {
111
+ const doc = parseHTML('<html><body><video><track kind="subtitles" src="subs.vtt" srclang="en"></video></body></html>');
112
+ const track = doc.querySelector("track");
113
+ expect(track).not.toBeNull();
114
+ expect(track!.outerHTML).toBe('<track kind="subtitles" src="subs.vtt" srclang="en">');
115
+ });
116
+
117
+ it("should serialize <area> with attributes without closing tag", () => {
118
+ const doc = parseHTML('<html><body><map name="test"><area shape="rect" coords="0,0,100,100" href="link.html"></map></body></html>');
119
+ const area = doc.querySelector("area");
120
+ expect(area).not.toBeNull();
121
+ expect(area!.outerHTML).toBe('<area shape="rect" coords="0,0,100,100" href="link.html">');
122
+ });
123
+ });
124
+
125
+ describe("All void elements - comprehensive test", () => {
126
+ VOID_ELEMENTS.forEach((tagName) => {
127
+ it(`should serialize <${tagName}> without closing tag`, () => {
128
+ const doc = parseHTML(`<html><body><${tagName}></body></html>`);
129
+ const element = doc.querySelector(tagName);
130
+ expect(element).not.toBeNull();
131
+ expect(element!.outerHTML).toBe(`<${tagName}>`);
132
+ expect(element!.outerHTML).not.toContain(`</${tagName}>`);
133
+ });
134
+ });
135
+ });
136
+
137
+ describe("Multiple void elements in same document", () => {
138
+ it("should serialize multiple void elements correctly", () => {
139
+ const doc = parseHTML('<html><body><img src="test.jpg"><br><input type="text"></body></html>');
140
+
141
+ const img = doc.querySelector("img");
142
+ const br = doc.querySelector("br");
143
+ const input = doc.querySelector("input");
144
+
145
+ expect(img!.outerHTML).toBe('<img src="test.jpg">');
146
+ expect(br!.outerHTML).toBe("<br>");
147
+ expect(input!.outerHTML).toBe('<input type="text">');
148
+ });
149
+
150
+ it("should serialize document with multiple void elements without closing tags", () => {
151
+ const html = '<html><body><img src="test.jpg"><br><input type="text"></body></html>';
152
+ const doc = parseHTML(html);
153
+ const outerHTML = doc.documentElement.outerHTML;
154
+
155
+ expect(outerHTML).not.toContain("</img>");
156
+ expect(outerHTML).not.toContain("</br>");
157
+ expect(outerHTML).not.toContain("</input>");
158
+ });
159
+ });
160
+
161
+ describe("Void elements in head section", () => {
162
+ it("should serialize head void elements without closing tags", () => {
163
+ const html = `<html>
164
+ <head>
165
+ <meta charset="utf-8">
166
+ <meta name="viewport" content="width=device-width">
167
+ <link rel="stylesheet" href="style.css">
168
+ <base href="https://example.com/">
169
+ </head>
170
+ <body></body>
171
+ </html>`;
172
+ const doc = parseHTML(html);
173
+
174
+ const metas = doc.querySelectorAll("meta");
175
+ const link = doc.querySelector("link");
176
+ const base = doc.querySelector("base");
177
+
178
+ metas.forEach((meta: any) => {
179
+ expect(meta.outerHTML).not.toContain("</meta>");
180
+ });
181
+ expect(link!.outerHTML).not.toContain("</link>");
182
+ expect(base!.outerHTML).not.toContain("</base>");
183
+ });
184
+ });
185
+
186
+ describe("Void elements created with createElement", () => {
187
+ it("should serialize dynamically created <img> without closing tag", () => {
188
+ const doc = parseHTML("<html><body></body></html>");
189
+ const img = doc.createElement("img");
190
+ img.setAttribute("src", "dynamic.jpg");
191
+ expect(img.outerHTML).toBe('<img src="dynamic.jpg">');
192
+ });
193
+
194
+ it("should serialize dynamically created <br> without closing tag", () => {
195
+ const doc = parseHTML("<html><body></body></html>");
196
+ const br = doc.createElement("br");
197
+ expect(br.outerHTML).toBe("<br>");
198
+ });
199
+
200
+ it("should serialize dynamically created <input> without closing tag", () => {
201
+ const doc = parseHTML("<html><body></body></html>");
202
+ const input = doc.createElement("input");
203
+ input.setAttribute("type", "password");
204
+ input.setAttribute("name", "secret");
205
+ expect(input.outerHTML).toBe('<input type="password" name="secret">');
206
+ });
207
+
208
+ it("should serialize dynamically created <meta> without closing tag", () => {
209
+ const doc = parseHTML("<html><body></body></html>");
210
+ const meta = doc.createElement("meta");
211
+ meta.setAttribute("name", "description");
212
+ meta.setAttribute("content", "Test page");
213
+ expect(meta.outerHTML).toBe('<meta name="description" content="Test page">');
214
+ });
215
+
216
+ it("should serialize dynamically created <hr> without closing tag", () => {
217
+ const doc = parseHTML("<html><body></body></html>");
218
+ const hr = doc.createElement("hr");
219
+ expect(hr.outerHTML).toBe("<hr>");
220
+ });
221
+
222
+ VOID_ELEMENTS.forEach((tagName) => {
223
+ it(`should serialize dynamically created <${tagName}> without closing tag`, () => {
224
+ const doc = parseHTML("<html><body></body></html>");
225
+ const element = doc.createElement(tagName);
226
+ expect(element.outerHTML).toBe(`<${tagName}>`);
227
+ expect(element.outerHTML).not.toContain(`</${tagName}>`);
228
+ });
229
+ });
230
+ });
231
+
232
+ describe("Void elements with XHTML-style syntax", () => {
233
+ it("should handle <br /> and serialize without closing tag", () => {
234
+ const doc = parseHTML("<html><body><br /></body></html>");
235
+ const br = doc.querySelector("br");
236
+ expect(br).not.toBeNull();
237
+ expect(br!.outerHTML).toBe("<br>");
238
+ expect(br!.outerHTML).not.toContain("</br>");
239
+ });
240
+
241
+ it("should handle <img /> and serialize without closing tag", () => {
242
+ const doc = parseHTML('<html><body><img src="test.jpg" /></body></html>');
243
+ const img = doc.querySelector("img");
244
+ expect(img).not.toBeNull();
245
+ expect(img!.outerHTML).toBe('<img src="test.jpg">');
246
+ expect(img!.outerHTML).not.toContain("</img>");
247
+ });
248
+
249
+ it("should handle <input /> and serialize without closing tag", () => {
250
+ const doc = parseHTML('<html><body><input type="text" /></body></html>');
251
+ const input = doc.querySelector("input");
252
+ expect(input).not.toBeNull();
253
+ expect(input!.outerHTML).toBe('<input type="text">');
254
+ expect(input!.outerHTML).not.toContain("</input>");
255
+ });
256
+ });
257
+
258
+ describe("Non-void elements should have closing tags", () => {
259
+ it("should serialize <div> with closing tag", () => {
260
+ const doc = parseHTML("<html><body><div></div></body></html>");
261
+ const div = doc.querySelector("div");
262
+ expect(div).not.toBeNull();
263
+ expect(div!.outerHTML).toBe("<div></div>");
264
+ });
265
+
266
+ it("should serialize <span> with closing tag", () => {
267
+ const doc = parseHTML("<html><body><span></span></body></html>");
268
+ const span = doc.querySelector("span");
269
+ expect(span).not.toBeNull();
270
+ expect(span!.outerHTML).toBe("<span></span>");
271
+ });
272
+
273
+ it("should serialize <p> with closing tag", () => {
274
+ const doc = parseHTML("<html><body><p></p></body></html>");
275
+ const p = doc.querySelector("p");
276
+ expect(p).not.toBeNull();
277
+ expect(p!.outerHTML).toBe("<p></p>");
278
+ });
279
+
280
+ it("should serialize <script> with closing tag", () => {
281
+ const doc = parseHTML("<html><body><script></script></body></html>");
282
+ const script = doc.querySelector("script");
283
+ expect(script).not.toBeNull();
284
+ expect(script!.outerHTML).toBe("<script></script>");
285
+ });
286
+
287
+ it("should serialize <style> with closing tag", () => {
288
+ const doc = parseHTML("<html><head><style></style></head><body></body></html>");
289
+ const style = doc.querySelector("style");
290
+ expect(style).not.toBeNull();
291
+ expect(style!.outerHTML).toBe("<style></style>");
292
+ });
293
+
294
+ it("should serialize <iframe> with closing tag", () => {
295
+ const doc = parseHTML('<html><body><iframe src="page.html"></iframe></body></html>');
296
+ const iframe = doc.querySelector("iframe");
297
+ expect(iframe).not.toBeNull();
298
+ expect(iframe!.outerHTML).toBe('<iframe src="page.html"></iframe>');
299
+ });
300
+
301
+ it("should serialize <textarea> with closing tag", () => {
302
+ const doc = parseHTML("<html><body><textarea></textarea></body></html>");
303
+ const textarea = doc.querySelector("textarea");
304
+ expect(textarea).not.toBeNull();
305
+ expect(textarea!.outerHTML).toBe("<textarea></textarea>");
306
+ });
307
+
308
+ it("should serialize <video> with closing tag", () => {
309
+ const doc = parseHTML("<html><body><video></video></body></html>");
310
+ const video = doc.querySelector("video");
311
+ expect(video).not.toBeNull();
312
+ expect(video!.outerHTML).toBe("<video></video>");
313
+ });
314
+
315
+ it("should serialize <audio> with closing tag", () => {
316
+ const doc = parseHTML("<html><body><audio></audio></body></html>");
317
+ const audio = doc.querySelector("audio");
318
+ expect(audio).not.toBeNull();
319
+ expect(audio!.outerHTML).toBe("<audio></audio>");
320
+ });
321
+
322
+ it("should serialize <canvas> with closing tag", () => {
323
+ const doc = parseHTML("<html><body><canvas></canvas></body></html>");
324
+ const canvas = doc.querySelector("canvas");
325
+ expect(canvas).not.toBeNull();
326
+ expect(canvas!.outerHTML).toBe("<canvas></canvas>");
327
+ });
328
+ });
329
+
330
+ describe("Void elements with content (should be ignored)", () => {
331
+ it("should not include text content in void element", () => {
332
+ const doc = parseHTML("<html><body><br>text</body></html>");
333
+ const br = doc.querySelector("br");
334
+ expect(br).not.toBeNull();
335
+ expect(br!.outerHTML).toBe("<br>");
336
+ });
337
+
338
+ it("should not include innerHTML content in void element", () => {
339
+ const doc = parseHTML("<html><body><img src=\"test.jpg\"></body></html>");
340
+ const img = doc.querySelector("img");
341
+ expect(img).not.toBeNull();
342
+ expect(img!.innerHTML).toBe("");
343
+ expect(img!.outerHTML).toBe('<img src="test.jpg">');
344
+ });
345
+ });
346
+
347
+ describe("Void elements in nested structures", () => {
348
+ it("should serialize void elements inside multiple nested elements", () => {
349
+ const html = `<html><body>
350
+ <div class="container">
351
+ <form>
352
+ <div class="form-group">
353
+ <input type="text" name="field1">
354
+ <br>
355
+ <input type="password" name="field2">
356
+ </div>
357
+ </form>
358
+ </div>
359
+ </body></html>`;
360
+
361
+ const doc = parseHTML(html);
362
+ const inputs = doc.querySelectorAll("input");
363
+ const br = doc.querySelector("br");
364
+
365
+ expect(inputs.length).toBe(2);
366
+ inputs.forEach((input: any) => {
367
+ expect(input.outerHTML).not.toContain("</input>");
368
+ });
369
+ expect(br!.outerHTML).toBe("<br>");
370
+ });
371
+
372
+ it("should serialize void elements inside tables correctly", () => {
373
+ const html = `<html><body>
374
+ <table>
375
+ <colgroup>
376
+ <col span="1" class="col1">
377
+ <col span="2" class="col2">
378
+ </colgroup>
379
+ <tr><td><img src="icon.png"></td></tr>
380
+ </table>
381
+ </body></html>`;
382
+
383
+ const doc = parseHTML(html);
384
+ const cols = doc.querySelectorAll("col");
385
+ const img = doc.querySelector("img");
386
+
387
+ expect(cols.length).toBe(2);
388
+ cols.forEach((col: any) => {
389
+ expect(col.outerHTML).not.toContain("</col>");
390
+ });
391
+ expect(img!.outerHTML).not.toContain("</img>");
392
+ });
393
+ });
394
+
395
+ describe("Edge cases", () => {
396
+ it("should handle void element with boolean attributes", () => {
397
+ const doc = parseHTML('<html><body><input type="checkbox" checked disabled></body></html>');
398
+ const input = doc.querySelector("input");
399
+ expect(input).not.toBeNull();
400
+ expect(input!.outerHTML).not.toContain("</input>");
401
+ });
402
+
403
+ it("should handle void element with empty attribute value", () => {
404
+ const doc = parseHTML('<html><body><input type="text" value=""></body></html>');
405
+ const input = doc.querySelector("input");
406
+ expect(input).not.toBeNull();
407
+ expect(input!.outerHTML).not.toContain("</input>");
408
+ });
409
+
410
+ it("should handle uppercase void element tag names", () => {
411
+ const doc = parseHTML("<html><body><BR><IMG SRC=\"test.jpg\"></body></html>");
412
+ const br = doc.querySelector("br");
413
+ const img = doc.querySelector("img");
414
+
415
+ expect(br).not.toBeNull();
416
+ expect(img).not.toBeNull();
417
+ expect(br!.outerHTML).not.toContain("</br>");
418
+ expect(br!.outerHTML).not.toContain("</BR>");
419
+ expect(img!.outerHTML).not.toContain("</img>");
420
+ expect(img!.outerHTML).not.toContain("</IMG>");
421
+ });
422
+
423
+ it("should handle mixed case void element tag names", () => {
424
+ const doc = parseHTML("<html><body><Br><ImG src=\"test.jpg\"></body></html>");
425
+ const br = doc.querySelector("br");
426
+ const img = doc.querySelector("img");
427
+
428
+ expect(br).not.toBeNull();
429
+ expect(img).not.toBeNull();
430
+ expect(br!.outerHTML.toLowerCase()).not.toContain("</br>");
431
+ expect(img!.outerHTML.toLowerCase()).not.toContain("</img>");
432
+ });
433
+ });
434
+
435
+ describe("Full document serialization", () => {
436
+ it("should serialize complete document without closing tags on void elements", () => {
437
+ const html = `<html>
438
+ <head>
439
+ <meta charset="utf-8">
440
+ <link rel="stylesheet" href="style.css">
441
+ </head>
442
+ <body>
443
+ <img src="logo.png" alt="Logo">
444
+ <hr>
445
+ <form>
446
+ <input type="text" name="username">
447
+ <br>
448
+ <input type="password" name="password">
449
+ </form>
450
+ </body>
451
+ </html>`;
452
+
453
+ const doc = parseHTML(html);
454
+ const fullHTML = doc.documentElement.outerHTML;
455
+
456
+ // Check no void elements have closing tags
457
+ expect(fullHTML).not.toContain("</meta>");
458
+ expect(fullHTML).not.toContain("</link>");
459
+ expect(fullHTML).not.toContain("</img>");
460
+ expect(fullHTML).not.toContain("</hr>");
461
+ expect(fullHTML).not.toContain("</input>");
462
+ expect(fullHTML).not.toContain("</br>");
463
+
464
+ // Check non-void elements still have closing tags
465
+ expect(fullHTML).toContain("</head>");
466
+ expect(fullHTML).toContain("</body>");
467
+ expect(fullHTML).toContain("</form>");
468
+ expect(fullHTML).toContain("</html>");
469
+ });
470
+ });
471
+ });