@tkeron/html-parser 0.1.7 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +1 -7
  2. package/bun.lock +5 -0
  3. package/index.ts +4 -0
  4. package/package.json +7 -1
  5. package/src/css-selector.ts +1 -1
  6. package/src/dom-simulator.ts +38 -16
  7. package/src/encoding.ts +39 -0
  8. package/src/index.ts +9 -0
  9. package/src/parser.ts +478 -144
  10. package/src/serializer.ts +450 -0
  11. package/src/tokenizer.ts +59 -43
  12. package/tests/advanced.test.ts +119 -106
  13. package/tests/custom-elements.test.ts +172 -162
  14. package/tests/dom-extended.test.ts +12 -12
  15. package/tests/dom-manipulation.test.ts +9 -10
  16. package/tests/dom.test.ts +32 -27
  17. package/tests/helpers/tokenizer-adapter.test.ts +70 -0
  18. package/tests/helpers/tokenizer-adapter.ts +65 -0
  19. package/tests/helpers/tree-adapter.test.ts +39 -0
  20. package/tests/helpers/tree-adapter.ts +43 -0
  21. package/tests/html5lib-data/tokenizer/namedEntities.test +42422 -0
  22. package/tests/html5lib-data/tokenizer/pendingSpecChanges.test +9 -0
  23. package/tests/html5lib-data/tree-construction/adoption01.dat +354 -0
  24. package/tests/html5lib-data/tree-construction/adoption02.dat +39 -0
  25. package/tests/html5lib-data/tree-construction/domjs-unsafe.dat +0 -0
  26. package/tests/html5lib-data/tree-construction/entities02.dat +309 -0
  27. package/tests/html5lib-data/tree-construction/html5test-com.dat +301 -0
  28. package/tests/html5lib-data/tree-construction/math.dat +104 -0
  29. package/tests/html5lib-data/tree-construction/namespace-sensitivity.dat +22 -0
  30. package/tests/html5lib-data/tree-construction/noscript01.dat +237 -0
  31. package/tests/html5lib-data/tree-construction/ruby.dat +302 -0
  32. package/tests/html5lib-data/tree-construction/scriptdata01.dat +372 -0
  33. package/tests/html5lib-data/tree-construction/svg.dat +104 -0
  34. package/tests/html5lib-data/tree-construction/template.dat +1673 -0
  35. package/tests/html5lib-data/tree-construction/tests10.dat +853 -0
  36. package/tests/html5lib-data/tree-construction/tests11.dat +523 -0
  37. package/tests/html5lib-data/tree-construction/tests20.dat +842 -0
  38. package/tests/html5lib-data/tree-construction/tests21.dat +306 -0
  39. package/tests/html5lib-data/tree-construction/tests23.dat +168 -0
  40. package/tests/html5lib-data/tree-construction/tests24.dat +79 -0
  41. package/tests/html5lib-data/tree-construction/tests5.dat +210 -0
  42. package/tests/html5lib-data/tree-construction/tests6.dat +663 -0
  43. package/tests/html5lib-data/tree-construction/tests_innerHTML_1.dat +844 -0
  44. package/tests/parser.test.ts +172 -193
  45. package/tests/serializer-core.test.ts +16 -0
  46. package/tests/serializer-data/core.test +125 -0
  47. package/tests/serializer-data/injectmeta.test +66 -0
  48. package/tests/serializer-data/optionaltags.test +965 -0
  49. package/tests/serializer-data/options.test +60 -0
  50. package/tests/serializer-data/whitespace.test +51 -0
  51. package/tests/serializer-injectmeta.test.ts +16 -0
  52. package/tests/serializer-optionaltags.test.ts +16 -0
  53. package/tests/serializer-options.test.ts +16 -0
  54. package/tests/serializer-whitespace.test.ts +16 -0
  55. package/tests/tokenizer-namedEntities.test.ts +20 -0
  56. package/tests/tokenizer-pendingSpecChanges.test.ts +20 -0
  57. package/tests/tokenizer.test.ts +3 -6
  58. package/tests/tree-construction-adoption01.test.ts +37 -0
  59. package/tests/tree-construction-adoption02.test.ts +34 -0
  60. package/tests/tree-construction-domjs-unsafe.test.ts +24 -0
  61. package/tests/tree-construction-entities02.test.ts +33 -0
  62. package/tests/tree-construction-html5test-com.test.ts +24 -0
  63. package/tests/tree-construction-math.test.ts +18 -0
  64. package/tests/tree-construction-namespace-sensitivity.test.ts +18 -0
  65. package/tests/tree-construction-noscript01.test.ts +18 -0
  66. package/tests/tree-construction-ruby.test.ts +21 -0
  67. package/tests/tree-construction-scriptdata01.test.ts +21 -0
  68. package/tests/tree-construction-svg.test.ts +21 -0
  69. package/tests/tree-construction-template.test.ts +21 -0
  70. package/tests/tree-construction-tests10.test.ts +21 -0
  71. package/tests/tree-construction-tests11.test.ts +21 -0
  72. package/tests/tree-construction-tests20.test.ts +18 -0
  73. package/tests/tree-construction-tests21.test.ts +18 -0
  74. package/tests/tree-construction-tests23.test.ts +18 -0
  75. package/tests/tree-construction-tests24.test.ts +18 -0
  76. package/tests/tree-construction-tests5.test.ts +21 -0
  77. package/tests/tree-construction-tests6.test.ts +21 -0
  78. package/tests/tree-construction-tests_innerHTML_1.test.ts +21 -0
  79. package/tests/official/README.md +0 -87
  80. package/tests/official/acid/acid-tests.test.ts +0 -309
  81. package/tests/official/final-output/final-output.test.ts +0 -361
  82. package/tests/official/html5lib/tokenizer-utils.ts +0 -192
  83. package/tests/official/html5lib/tokenizer.test.ts +0 -171
  84. package/tests/official/html5lib/tree-construction-utils.ts +0 -194
  85. package/tests/official/html5lib/tree-construction.test.ts +0 -250
  86. package/tests/official/validator/validator-tests.test.ts +0 -237
  87. package/tests/official/validator-nu/validator-nu.test.ts +0 -335
  88. package/tests/official/whatwg/whatwg-tests.test.ts +0 -205
  89. package/tests/official/wpt/wpt-tests.test.ts +0 -409
@@ -8,28 +8,28 @@ describe('DOM Extended Functionality', () => {
8
8
  describe('innerHTML and outerHTML', () => {
9
9
  it('should generate correct innerHTML for simple elements', () => {
10
10
  const doc = parseHTML('<div>Hello World</div>') as Document;
11
- const div = doc.childNodes[0] as HTMLElement;
11
+ const div = doc.body?.firstChild as HTMLElement;
12
12
 
13
13
  expect(div.innerHTML).toBe('Hello World');
14
14
  });
15
15
 
16
16
  it('should generate correct innerHTML for nested elements', () => {
17
17
  const doc = parseHTML('<div><p>Hello</p><span>World</span></div>') as Document;
18
- const div = doc.childNodes[0] as HTMLElement;
18
+ const div = doc.body?.firstChild as HTMLElement;
19
19
 
20
20
  expect(div.innerHTML).toBe('<p>Hello</p><span>World</span>');
21
21
  });
22
22
 
23
23
  it('should generate correct outerHTML for elements', () => {
24
24
  const doc = parseHTML('<div class="test">Hello</div>') as Document;
25
- const div = doc.childNodes[0] as HTMLElement;
25
+ const div = doc.body?.firstChild as HTMLElement;
26
26
 
27
27
  expect(div.outerHTML).toBe('<div class="test">Hello</div>');
28
28
  });
29
29
 
30
30
  it('should generate correct outerHTML for elements with multiple attributes', () => {
31
31
  const doc = parseHTML('<input type="text" name="username" value="test">') as Document;
32
- const input = doc.childNodes[0] as HTMLElement;
32
+ const input = doc.body?.firstChild as HTMLElement;
33
33
 
34
34
  expect(input.outerHTML).toContain('type="text"');
35
35
  expect(input.outerHTML).toContain('name="username"');
@@ -38,7 +38,7 @@ describe('DOM Extended Functionality', () => {
38
38
 
39
39
  it('should handle comments in innerHTML', () => {
40
40
  const doc = parseHTML('<div><!-- comment -->text</div>') as Document;
41
- const div = doc.childNodes[0] as HTMLElement;
41
+ const div = doc.body?.firstChild as HTMLElement;
42
42
 
43
43
  expect(div.innerHTML).toBe('<!-- comment -->text');
44
44
  });
@@ -47,21 +47,21 @@ describe('DOM Extended Functionality', () => {
47
47
  describe('textContent property', () => {
48
48
  it('should provide textContent on elements', () => {
49
49
  const doc = parseHTML('<div>Hello <span>World</span></div>') as Document;
50
- const div = doc.childNodes[0] as HTMLElement;
50
+ const div = doc.body?.firstChild as HTMLElement;
51
51
 
52
52
  expect(div.textContent).toBe('Hello World');
53
53
  });
54
54
 
55
55
  it('should provide textContent for deeply nested elements', () => {
56
56
  const doc = parseHTML('<div><p><em>Hello</em> <strong>Beautiful</strong></p> <span>World</span></div>') as Document;
57
- const div = doc.childNodes[0] as HTMLElement;
57
+ const div = doc.body?.firstChild as HTMLElement;
58
58
 
59
59
  expect(div.textContent).toBe('Hello Beautiful World');
60
60
  });
61
61
 
62
62
  it('should ignore comments in textContent', () => {
63
63
  const doc = parseHTML('<div>Hello <!-- comment --> World</div>') as Document;
64
- const div = doc.childNodes[0] as HTMLElement;
64
+ const div = doc.body?.firstChild as HTMLElement;
65
65
 
66
66
  expect(div.textContent).toBe('Hello World');
67
67
  });
@@ -70,7 +70,7 @@ describe('DOM Extended Functionality', () => {
70
70
  describe('element navigation properties', () => {
71
71
  it('should provide parentElement property', () => {
72
72
  const doc = parseHTML('<div><p>Hello</p></div>') as Document;
73
- const div = doc.childNodes[0] as HTMLElement;
73
+ const div = doc.body?.firstChild as HTMLElement;
74
74
  const p = div.children[0];
75
75
 
76
76
  expect(p).toBeDefined();
@@ -79,7 +79,7 @@ describe('DOM Extended Functionality', () => {
79
79
 
80
80
  it('should provide firstElementChild and lastElementChild', () => {
81
81
  const doc = parseHTML('<div><span>First</span><p>Second</p><em>Last</em></div>') as Document;
82
- const div = doc.childNodes[0] as HTMLElement;
82
+ const div = doc.body?.firstChild as HTMLElement;
83
83
 
84
84
  expect(div.firstElementChild?.tagName).toBe('SPAN');
85
85
  expect(div.lastElementChild?.tagName).toBe('EM');
@@ -87,7 +87,7 @@ describe('DOM Extended Functionality', () => {
87
87
 
88
88
  it('should provide nextElementSibling and previousElementSibling', () => {
89
89
  const doc = parseHTML('<div><span>First</span><p>Second</p><em>Last</em></div>') as Document;
90
- const div = doc.childNodes[0] as HTMLElement;
90
+ const div = doc.body?.firstChild as HTMLElement;
91
91
  const span = div.children[0];
92
92
  const p = div.children[1];
93
93
  const em = div.children[2];
@@ -111,7 +111,7 @@ describe('DOM Extended Functionality', () => {
111
111
  describe('setInnerHTML functionality', () => {
112
112
  it('should clear existing content when setting innerHTML', () => {
113
113
  const doc = parseHTML('<div><p>Old content</p></div>') as Document;
114
- const div = doc.childNodes[0] as HTMLElement;
114
+ const div = doc.body?.firstChild as HTMLElement;
115
115
 
116
116
  setInnerHTML(div, 'New content');
117
117
 
@@ -897,18 +897,18 @@ describe("DOM Manipulation - prepend", () => {
897
897
 
898
898
  describe("prepend on document", () => {
899
899
  it("should prepend to document", () => {
900
- const doc = parseHTML("<!-- Comment --><div>Content</div>");
901
- const comment = doc.childNodes[0];
902
- const div = doc.childNodes[1];
900
+ const doc = parseHTML("<div>Content</div>");
901
+ const initialChildCount = doc.childNodes.length;
902
+ const firstChild = doc.firstChild;
903
903
 
904
904
  const newDiv = doc.createElement("div");
905
- newDiv.textContent = "First";
905
+ newDiv.textContent = "Prepended";
906
906
 
907
907
  doc.prepend(newDiv);
908
908
 
909
909
  expect(doc.firstChild).toBe(newDiv);
910
- expect(newDiv.nextSibling).toBe(comment);
911
- expect(doc.childNodes.length).toBe(3);
910
+ expect(newDiv.nextSibling).toBe(firstChild);
911
+ expect(doc.childNodes.length).toBe(initialChildCount + 1);
912
912
  });
913
913
  });
914
914
  });
@@ -1121,16 +1121,15 @@ describe("DOM Manipulation - append", () => {
1121
1121
  describe("append on document", () => {
1122
1122
  it("should append to document", () => {
1123
1123
  const doc = parseHTML("<div>Content</div>");
1124
- const div = doc.childNodes[0];
1124
+ const initialChildCount = doc.childNodes.length;
1125
1125
 
1126
1126
  const newDiv = doc.createElement("div");
1127
- newDiv.textContent = "Last";
1127
+ newDiv.textContent = "Appended";
1128
1128
 
1129
1129
  doc.append(newDiv);
1130
1130
 
1131
1131
  expect(doc.lastChild).toBe(newDiv);
1132
- expect(div.nextSibling).toBe(newDiv);
1133
- expect(doc.childNodes.length).toBe(2);
1132
+ expect(doc.childNodes.length).toBe(initialChildCount + 1);
1134
1133
  });
1135
1134
  });
1136
1135
  });
package/tests/dom.test.ts CHANGED
@@ -10,6 +10,10 @@ import {
10
10
  } from "../src/dom-simulator";
11
11
  import { parse } from "../src/parser";
12
12
 
13
+ function getBodyContent(doc: any): any {
14
+ return doc.body?.firstChild;
15
+ }
16
+
13
17
  describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
14
18
  describe("parseHTML basic functionality", () => {
15
19
  it("should return a Document object", () => {
@@ -21,8 +25,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
21
25
  it("should parse simple HTML elements", () => {
22
26
  const doc = parseHTML("<p>Hello World</p>");
23
27
 
24
- expect(doc.childNodes.length).toBe(1);
25
- const paragraph = doc.childNodes[0]!;
28
+ const paragraph = getBodyContent(doc);
26
29
 
27
30
  expect(paragraph.nodeType).toBe(NodeType.ELEMENT_NODE);
28
31
  expect(paragraph.nodeName).toBe("P");
@@ -31,7 +34,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
31
34
 
32
35
  it("should parse text content correctly", () => {
33
36
  const doc = parseHTML("<p>Hello World</p>");
34
- const paragraph = doc.childNodes[0]!;
37
+ const paragraph = getBodyContent(doc);
35
38
 
36
39
  expect(paragraph.childNodes.length).toBe(1);
37
40
  const textNode = paragraph.childNodes[0]!;
@@ -44,7 +47,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
44
47
  it("should parse nested elements", () => {
45
48
  const doc = parseHTML("<div><p>Hello</p><span>World</span></div>");
46
49
 
47
- const div = doc.childNodes[0]!;
50
+ const div = getBodyContent(doc);
48
51
  expect(div.nodeName).toBe("DIV");
49
52
  expect(div.childNodes.length).toBe(2);
50
53
 
@@ -57,17 +60,19 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
57
60
 
58
61
  it("should handle attributes correctly", () => {
59
62
  const doc = parseHTML('<p id="test" class="highlight">Content</p>');
60
- const paragraph = doc.childNodes[0]! as any;
63
+ const paragraph = getBodyContent(doc) as any;
61
64
 
62
65
  expect(paragraph.attributes.id).toBe("test");
63
66
  expect(paragraph.attributes.class).toBe("highlight");
64
67
  });
65
68
 
66
69
  it("should parse comments", () => {
67
- const doc = parseHTML("<!-- This is a comment --><p>Hello</p>");
70
+ const doc = parseHTML("<div><!-- This is a comment --></div><p>Hello</p>");
68
71
 
69
- expect(doc.childNodes.length).toBe(2);
70
- const comment = doc.childNodes[0]!;
72
+ const body = doc.body;
73
+ expect(body.childNodes.length).toBe(2);
74
+ const div = body.childNodes[0]!;
75
+ const comment = div.childNodes[0]!;
71
76
 
72
77
  expect(comment.nodeType).toBe(NodeType.COMMENT_NODE);
73
78
  expect(comment.nodeName).toBe("#comment");
@@ -77,11 +82,11 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
77
82
  it("should set parent-child relationships correctly", () => {
78
83
  const doc = parseHTML("<div><p>Hello</p></div>");
79
84
 
80
- const div = doc.childNodes[0]!;
85
+ const div = getBodyContent(doc);
81
86
  const p = div.childNodes[0]!;
82
87
 
83
88
  expect(p.parentNode).toBe(<any>div);
84
- expect(div.parentNode).toBe(doc);
89
+ expect(div.parentNode).toBe(doc.body);
85
90
  expect(div.firstChild).toBe(p);
86
91
  expect(div.lastChild).toBe(p);
87
92
  });
@@ -91,7 +96,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
91
96
  "<div><p>First</p><span>Second</span><em>Third</em></div>"
92
97
  );
93
98
 
94
- const div = doc.childNodes[0]!;
99
+ const div = getBodyContent(doc);
95
100
  const p = div.childNodes[0]!;
96
101
  const span = div.childNodes[1]!;
97
102
  const em = div.childNodes[2]!;
@@ -108,7 +113,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
108
113
  it("should handle self-closing elements", () => {
109
114
  const doc = parseHTML("<p>Before<br/>After</p>");
110
115
 
111
- const p = doc.childNodes[0]!;
116
+ const p = getBodyContent(doc);
112
117
  expect(p.childNodes.length).toBe(3);
113
118
 
114
119
  const br = p.childNodes[1]!;
@@ -119,7 +124,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
119
124
  it("should handle empty elements", () => {
120
125
  const doc = parseHTML("<div></div>");
121
126
 
122
- const div = doc.childNodes[0]!;
127
+ const div = getBodyContent(doc);
123
128
  expect(div.childNodes.length).toBe(0);
124
129
  expect(div.firstChild).toBeNull();
125
130
  expect(div.lastChild).toBeNull();
@@ -154,7 +159,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
154
159
  describe("getTextContent", () => {
155
160
  it("should get text content from a simple text node", () => {
156
161
  const doc = parseHTML("<p>Hello World</p>");
157
- const p = doc.childNodes[0]!;
162
+ const p = getBodyContent(doc);
158
163
  const textNode = p.childNodes[0]!;
159
164
 
160
165
  expect(getTextContent(textNode)).toBe("Hello World");
@@ -162,14 +167,14 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
162
167
 
163
168
  it("should get text content from an element with text", () => {
164
169
  const doc = parseHTML("<p>Hello World</p>");
165
- const p = doc.childNodes[0]!;
170
+ const p = getBodyContent(doc);
166
171
 
167
172
  expect(getTextContent(p)).toBe("Hello World");
168
173
  });
169
174
 
170
175
  it("should get concatenated text from nested elements", () => {
171
176
  const doc = parseHTML("<div>Hello <span>beautiful</span> world</div>");
172
- const div = doc.childNodes[0]!;
177
+ const div = getBodyContent(doc);
173
178
 
174
179
  expect(getTextContent(div)).toBe("Hello beautiful world");
175
180
  });
@@ -178,28 +183,28 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
178
183
  const doc = parseHTML(
179
184
  "<div>Start <p>Middle <em>Deep <strong>Deeper</strong></em></p> End</div>"
180
185
  );
181
- const div = doc.childNodes[0]!;
186
+ const div = getBodyContent(doc);
182
187
 
183
188
  expect(getTextContent(div)).toBe("Start Middle Deep Deeper End");
184
189
  });
185
190
 
186
191
  it("should return empty string for elements with no text", () => {
187
192
  const doc = parseHTML("<div></div>");
188
- const div = doc.childNodes[0]!;
193
+ const div = getBodyContent(doc);
189
194
 
190
195
  expect(getTextContent(div)).toBe("");
191
196
  });
192
197
 
193
198
  it("should ignore comments when getting text content", () => {
194
199
  const doc = parseHTML("<div>Before<!-- comment -->After</div>");
195
- const div = doc.childNodes[0]!;
200
+ const div = getBodyContent(doc);
196
201
 
197
202
  expect(getTextContent(div)).toBe("BeforeAfter");
198
203
  });
199
204
 
200
205
  it("should handle mixed content with self-closing elements", () => {
201
206
  const doc = parseHTML("<p>Before<br/>After</p>");
202
- const p = doc.childNodes[0]!;
207
+ const p = getBodyContent(doc);
203
208
 
204
209
  expect(getTextContent(p)).toBe("BeforeAfter");
205
210
  });
@@ -210,7 +215,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
210
215
  const doc = parseHTML(
211
216
  '<div id="test" class="highlight" data-value="123">Content</div>'
212
217
  );
213
- const div = doc.childNodes[0]! as any;
218
+ const div = getBodyContent(doc) as any;
214
219
 
215
220
  expect(getAttribute(div, "id")).toBe("test");
216
221
  expect(getAttribute(div, "class")).toBe("highlight");
@@ -219,7 +224,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
219
224
 
220
225
  it("should return null for non-existing attributes", () => {
221
226
  const doc = parseHTML('<div id="test">Content</div>');
222
- const div = doc.childNodes[0]! as any;
227
+ const div = getBodyContent(doc) as any;
223
228
 
224
229
  expect(getAttribute(div, "nonexistent")).toBeNull();
225
230
  expect(getAttribute(div, "class")).toBeNull();
@@ -227,7 +232,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
227
232
 
228
233
  it("should check if attributes exist", () => {
229
234
  const doc = parseHTML('<div id="test" class="highlight">Content</div>');
230
- const div = doc.childNodes[0]! as any;
235
+ const div = getBodyContent(doc) as any;
231
236
 
232
237
  expect(hasAttribute(div, "id")).toBe(true);
233
238
  expect(hasAttribute(div, "class")).toBe(true);
@@ -236,7 +241,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
236
241
 
237
242
  it("should set new attributes", () => {
238
243
  const doc = parseHTML("<div>Content</div>");
239
- const div = doc.childNodes[0]! as any;
244
+ const div = getBodyContent(doc) as any;
240
245
 
241
246
  setAttribute(div, "id", "new-id");
242
247
  setAttribute(div, "class", "new-class");
@@ -249,7 +254,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
249
254
 
250
255
  it("should update existing attributes", () => {
251
256
  const doc = parseHTML('<div id="old-id" class="old-class">Content</div>');
252
- const div = doc.childNodes[0]! as any;
257
+ const div = getBodyContent(doc) as any;
253
258
 
254
259
  setAttribute(div, "id", "new-id");
255
260
  setAttribute(div, "class", "new-class");
@@ -262,7 +267,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
262
267
  const doc = parseHTML(
263
268
  '<div id="test" class="highlight" data-value="123">Content</div>'
264
269
  );
265
- const div = doc.childNodes[0]! as any;
270
+ const div = getBodyContent(doc) as any;
266
271
 
267
272
  removeAttribute(div, "class");
268
273
  removeAttribute(div, "data-value");
@@ -276,7 +281,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
276
281
 
277
282
  it("should handle removing non-existing attributes gracefully", () => {
278
283
  const doc = parseHTML('<div id="test">Content</div>');
279
- const div = doc.childNodes[0]! as any;
284
+ const div = getBodyContent(doc) as any;
280
285
 
281
286
  removeAttribute(div, "nonexistent");
282
287
 
@@ -0,0 +1,70 @@
1
+ import { tokenize } from '../../src/tokenizer';
2
+ import { adaptTokens, type Html5libToken } from './tokenizer-adapter.ts';
3
+
4
+ describe('Tokenizer Adapter Tests', () => {
5
+ it('should adapt simple start tag', () => {
6
+ const tokens = tokenize('<div>');
7
+ const adapted = adaptTokens(tokens);
8
+ expect(adapted).toEqual([
9
+ ['StartTag', 'div', {}]
10
+ ]);
11
+ });
12
+
13
+ it('should adapt start tag with attributes', () => {
14
+ const tokens = tokenize('<div class="foo" id="bar">');
15
+ const adapted = adaptTokens(tokens);
16
+ expect(adapted).toEqual([
17
+ ['StartTag', 'div', { class: 'foo', id: 'bar' }]
18
+ ]);
19
+ });
20
+
21
+ it('should adapt self-closing tag', () => {
22
+ const tokens = tokenize('<br/>');
23
+ const adapted = adaptTokens(tokens);
24
+ expect(adapted).toEqual([
25
+ ['StartTag', 'br', {}, true]
26
+ ]);
27
+ });
28
+
29
+ it('should adapt end tag', () => {
30
+ const tokens = tokenize('</div>');
31
+ const adapted = adaptTokens(tokens);
32
+ expect(adapted).toEqual([
33
+ ['EndTag', 'div']
34
+ ]);
35
+ });
36
+
37
+ it('should adapt text', () => {
38
+ const tokens = tokenize('hello world');
39
+ const adapted = adaptTokens(tokens);
40
+ expect(adapted).toEqual([
41
+ ['Character', 'hello world']
42
+ ]);
43
+ });
44
+
45
+ it('should adapt comment', () => {
46
+ const tokens = tokenize('<!-- comment -->');
47
+ const adapted = adaptTokens(tokens);
48
+ expect(adapted).toEqual([
49
+ ['Comment', ' comment ']
50
+ ]);
51
+ });
52
+
53
+ it('should adapt DOCTYPE', () => {
54
+ const tokens = tokenize('<!DOCTYPE html>');
55
+ const adapted = adaptTokens(tokens);
56
+ expect(adapted).toEqual([
57
+ ['DOCTYPE', 'html', null, null, true]
58
+ ]);
59
+ });
60
+
61
+ it('should adapt mixed content', () => {
62
+ const tokens = tokenize('<div>hello</div>');
63
+ const adapted = adaptTokens(tokens);
64
+ expect(adapted).toEqual([
65
+ ['StartTag', 'div', {}],
66
+ ['Character', 'hello'],
67
+ ['EndTag', 'div']
68
+ ]);
69
+ });
70
+ });
@@ -0,0 +1,65 @@
1
+ // tests/helpers/tokenizer-adapter.ts
2
+
3
+ import type { Token } from '../../src/tokenizer';
4
+
5
+ export type Html5libToken =
6
+ | ['StartTag', string, Record<string, string>]
7
+ | ['StartTag', string, Record<string, string>, boolean] // con self-closing flag
8
+ | ['EndTag', string]
9
+ | ['Character', string]
10
+ | ['Comment', string]
11
+ | ['DOCTYPE', string, string | null, string | null, boolean];
12
+
13
+ export function adaptTokens(tokens: Token[]): Html5libToken[] {
14
+ const result: Html5libToken[] = [];
15
+
16
+ for (const token of tokens) {
17
+ if (token.type === 'EOF') continue;
18
+
19
+ switch (token.type) {
20
+ case 'TAG_OPEN':
21
+ if (token.isClosing) {
22
+ result.push(['EndTag', token.value]);
23
+ } else {
24
+ const attrs = token.attributes || {};
25
+ if (token.isSelfClosing) {
26
+ result.push(['StartTag', token.value, attrs, true]);
27
+ } else {
28
+ result.push(['StartTag', token.value, attrs]);
29
+ }
30
+ }
31
+ break;
32
+
33
+ case 'TAG_CLOSE':
34
+ result.push(['EndTag', token.value]);
35
+ break;
36
+
37
+ case 'TEXT':
38
+ result.push(['Character', token.value]);
39
+ break;
40
+
41
+ case 'COMMENT':
42
+ result.push(['Comment', token.value]);
43
+ break;
44
+
45
+ case 'DOCTYPE':
46
+ // Parsear DOCTYPE para extraer name, publicId, systemId
47
+ result.push(['DOCTYPE', token.value, null, null, true]);
48
+ break;
49
+
50
+ case 'CDATA':
51
+ result.push(['Character', token.value]);
52
+ break;
53
+ }
54
+ }
55
+
56
+ return result;
57
+ }
58
+
59
+ // Función para comparar tokens, manejando casos especiales
60
+ export function compareTokens(actual: Html5libToken[], expected: any[]): boolean {
61
+ // Implementar comparación flexible
62
+ // - Coalescer Characters consecutivos
63
+ // - Ignorar diferencias de whitespace en algunos casos
64
+ return JSON.stringify(actual) === JSON.stringify(expected);
65
+ }
@@ -0,0 +1,39 @@
1
+ import { parseHTML } from '../../index.ts';
2
+ import { serializeToHtml5lib } from './tree-adapter.ts';
3
+
4
+ describe('Tree Adapter Tests', () => {
5
+ it('should serialize simple element', () => {
6
+ const doc = parseHTML('<div></div>');
7
+ const serialized = serializeToHtml5lib(doc);
8
+ expect(serialized).toContain('| <html>');
9
+ expect(serialized).toContain('| <body>');
10
+ expect(serialized).toContain('| <div>');
11
+ });
12
+
13
+ it('should serialize element with attributes', () => {
14
+ const doc = parseHTML('<div class="foo" id="bar"></div>');
15
+ const serialized = serializeToHtml5lib(doc);
16
+ expect(serialized).toContain('<div>');
17
+ expect(serialized).toContain('class="foo"');
18
+ expect(serialized).toContain('id="bar"');
19
+ });
20
+
21
+ it('should serialize text content', () => {
22
+ const doc = parseHTML('<div>hello</div>');
23
+ const serialized = serializeToHtml5lib(doc);
24
+ expect(serialized).toContain('"hello"');
25
+ });
26
+
27
+ it('should serialize comment', () => {
28
+ const doc = parseHTML('<div><!-- comment --></div>');
29
+ const serialized = serializeToHtml5lib(doc);
30
+ expect(serialized).toContain('<!-- -->');
31
+ });
32
+
33
+ it('should serialize DOCTYPE', () => {
34
+ const doc = parseHTML('<!DOCTYPE html><div></div>');
35
+ const serialized = serializeToHtml5lib(doc);
36
+ expect(serialized).toContain('<!DOCTYPE html>');
37
+ expect(serialized).toContain('<div>');
38
+ });
39
+ });
@@ -0,0 +1,43 @@
1
+ // tests/helpers/tree-adapter.ts
2
+
3
+ export function serializeToHtml5lib(doc: any): string {
4
+ const lines: string[] = [];
5
+
6
+ function serialize(node: any, depth: number): void {
7
+ const indent = '| ' + ' '.repeat(depth);
8
+
9
+ if (node.nodeType === 9) { // DOCUMENT
10
+ for (const child of node.childNodes || []) {
11
+ serialize(child, depth);
12
+ }
13
+ } else if (node.nodeType === 1) { // ELEMENT
14
+ lines.push(`${indent}<${node.tagName.toLowerCase()}>`);
15
+
16
+ // Atributos en orden alfabético
17
+ const attrs = Object.entries(node.attributes || {}).sort(([a], [b]) => a.localeCompare(b));
18
+ for (const [name, value] of attrs) {
19
+ lines.push(`${indent} ${name}="${value}"`);
20
+ }
21
+
22
+ // Template special case
23
+ if (node.tagName.toLowerCase() === 'template' && node.content) {
24
+ lines.push(`${indent} content`);
25
+ serialize(node.content, depth + 2);
26
+ }
27
+
28
+ // Children
29
+ for (const child of node.childNodes || []) {
30
+ serialize(child, depth + 1);
31
+ }
32
+ } else if (node.nodeType === 3) { // TEXT
33
+ lines.push(`${indent}"${node.textContent}"`);
34
+ } else if (node.nodeType === 8) { // COMMENT
35
+ lines.push(`${indent}<!-- ${node.textContent} -->`);
36
+ } else if (node.nodeType === 10) { // DOCTYPE
37
+ lines.push(`${indent}<!DOCTYPE ${node.name || 'html'}>`);
38
+ }
39
+ }
40
+
41
+ serialize(doc, 0);
42
+ return lines.join('\n') + '\n';
43
+ }