@tkeron/html-parser 0.1.7 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -7
- package/bun.lock +5 -0
- package/index.ts +4 -0
- package/package.json +7 -1
- package/src/css-selector.ts +1 -1
- package/src/dom-simulator.ts +41 -17
- package/src/encoding.ts +39 -0
- package/src/index.ts +9 -0
- package/src/parser.ts +509 -143
- package/src/serializer.ts +450 -0
- package/src/tokenizer.ts +190 -118
- package/tests/advanced.test.ts +121 -108
- package/tests/custom-elements-head.test.ts +105 -0
- package/tests/dom-extended.test.ts +12 -12
- package/tests/dom-manipulation.test.ts +9 -10
- package/tests/dom.test.ts +32 -27
- package/tests/helpers/tokenizer-adapter.test.ts +70 -0
- package/tests/helpers/tokenizer-adapter.ts +65 -0
- package/tests/helpers/tree-adapter.test.ts +39 -0
- package/tests/helpers/tree-adapter.ts +60 -0
- package/tests/html5lib-data/tokenizer/namedEntities.test +42422 -0
- package/tests/html5lib-data/tokenizer/pendingSpecChanges.test +9 -0
- package/tests/html5lib-data/tree-construction/adoption01.dat +354 -0
- package/tests/html5lib-data/tree-construction/adoption02.dat +39 -0
- package/tests/html5lib-data/tree-construction/domjs-unsafe.dat +0 -0
- package/tests/html5lib-data/tree-construction/entities02.dat +309 -0
- package/tests/html5lib-data/tree-construction/html5test-com.dat +301 -0
- package/tests/html5lib-data/tree-construction/math.dat +104 -0
- package/tests/html5lib-data/tree-construction/namespace-sensitivity.dat +22 -0
- package/tests/html5lib-data/tree-construction/noscript01.dat +237 -0
- package/tests/html5lib-data/tree-construction/ruby.dat +302 -0
- package/tests/html5lib-data/tree-construction/scriptdata01.dat +372 -0
- package/tests/html5lib-data/tree-construction/svg.dat +104 -0
- package/tests/html5lib-data/tree-construction/template.dat +1673 -0
- package/tests/html5lib-data/tree-construction/tests10.dat +853 -0
- package/tests/html5lib-data/tree-construction/tests11.dat +523 -0
- package/tests/html5lib-data/tree-construction/tests20.dat +842 -0
- package/tests/html5lib-data/tree-construction/tests21.dat +306 -0
- package/tests/html5lib-data/tree-construction/tests23.dat +168 -0
- package/tests/html5lib-data/tree-construction/tests24.dat +79 -0
- package/tests/html5lib-data/tree-construction/tests5.dat +210 -0
- package/tests/html5lib-data/tree-construction/tests6.dat +663 -0
- package/tests/html5lib-data/tree-construction/tests_innerHTML_1.dat +844 -0
- package/tests/parser.test.ts +173 -193
- package/tests/serializer-core.test.ts +16 -0
- package/tests/serializer-data/core.test +125 -0
- package/tests/serializer-data/injectmeta.test +66 -0
- package/tests/serializer-data/optionaltags.test +965 -0
- package/tests/serializer-data/options.test +60 -0
- package/tests/serializer-data/whitespace.test +51 -0
- package/tests/serializer-injectmeta.test.ts +16 -0
- package/tests/serializer-optionaltags.test.ts +16 -0
- package/tests/serializer-options.test.ts +16 -0
- package/tests/serializer-whitespace.test.ts +16 -0
- package/tests/tokenizer-namedEntities.test.ts +20 -0
- package/tests/tokenizer-pendingSpecChanges.test.ts +20 -0
- package/tests/tokenizer.test.ts +25 -32
- package/tests/tree-construction-adoption01.test.ts +37 -0
- package/tests/tree-construction-adoption02.test.ts +34 -0
- package/tests/tree-construction-domjs-unsafe.test.ts +24 -0
- package/tests/tree-construction-entities02.test.ts +33 -0
- package/tests/tree-construction-html5test-com.test.ts +32 -0
- package/tests/tree-construction-math.test.ts +18 -0
- package/tests/tree-construction-namespace-sensitivity.test.ts +18 -0
- package/tests/tree-construction-noscript01.test.ts +18 -0
- package/tests/tree-construction-ruby.test.ts +21 -0
- package/tests/tree-construction-scriptdata01.test.ts +21 -0
- package/tests/tree-construction-svg.test.ts +21 -0
- package/tests/tree-construction-template.test.ts +21 -0
- package/tests/tree-construction-tests10.test.ts +21 -0
- package/tests/tree-construction-tests11.test.ts +21 -0
- package/tests/tree-construction-tests20.test.ts +18 -0
- package/tests/tree-construction-tests21.test.ts +18 -0
- package/tests/tree-construction-tests23.test.ts +18 -0
- package/tests/tree-construction-tests24.test.ts +18 -0
- package/tests/tree-construction-tests5.test.ts +21 -0
- package/tests/tree-construction-tests6.test.ts +21 -0
- package/tests/tree-construction-tests_innerHTML_1.test.ts +21 -0
- package/tests/custom-elements.test.ts +0 -745
- package/tests/official/README.md +0 -87
- package/tests/official/acid/acid-tests.test.ts +0 -309
- package/tests/official/final-output/final-output.test.ts +0 -361
- package/tests/official/html5lib/tokenizer-utils.ts +0 -192
- package/tests/official/html5lib/tokenizer.test.ts +0 -171
- package/tests/official/html5lib/tree-construction-utils.ts +0 -194
- package/tests/official/html5lib/tree-construction.test.ts +0 -250
- package/tests/official/validator/validator-tests.test.ts +0 -237
- package/tests/official/validator-nu/validator-nu.test.ts +0 -335
- package/tests/official/whatwg/whatwg-tests.test.ts +0 -205
- package/tests/official/wpt/wpt-tests.test.ts +0 -409
|
@@ -8,28 +8,28 @@ describe('DOM Extended Functionality', () => {
|
|
|
8
8
|
describe('innerHTML and outerHTML', () => {
|
|
9
9
|
it('should generate correct innerHTML for simple elements', () => {
|
|
10
10
|
const doc = parseHTML('<div>Hello World</div>') as Document;
|
|
11
|
-
const div = doc.
|
|
11
|
+
const div = doc.body?.firstChild as HTMLElement;
|
|
12
12
|
|
|
13
13
|
expect(div.innerHTML).toBe('Hello World');
|
|
14
14
|
});
|
|
15
15
|
|
|
16
16
|
it('should generate correct innerHTML for nested elements', () => {
|
|
17
17
|
const doc = parseHTML('<div><p>Hello</p><span>World</span></div>') as Document;
|
|
18
|
-
const div = doc.
|
|
18
|
+
const div = doc.body?.firstChild as HTMLElement;
|
|
19
19
|
|
|
20
20
|
expect(div.innerHTML).toBe('<p>Hello</p><span>World</span>');
|
|
21
21
|
});
|
|
22
22
|
|
|
23
23
|
it('should generate correct outerHTML for elements', () => {
|
|
24
24
|
const doc = parseHTML('<div class="test">Hello</div>') as Document;
|
|
25
|
-
const div = doc.
|
|
25
|
+
const div = doc.body?.firstChild as HTMLElement;
|
|
26
26
|
|
|
27
27
|
expect(div.outerHTML).toBe('<div class="test">Hello</div>');
|
|
28
28
|
});
|
|
29
29
|
|
|
30
30
|
it('should generate correct outerHTML for elements with multiple attributes', () => {
|
|
31
31
|
const doc = parseHTML('<input type="text" name="username" value="test">') as Document;
|
|
32
|
-
const input = doc.
|
|
32
|
+
const input = doc.body?.firstChild as HTMLElement;
|
|
33
33
|
|
|
34
34
|
expect(input.outerHTML).toContain('type="text"');
|
|
35
35
|
expect(input.outerHTML).toContain('name="username"');
|
|
@@ -38,7 +38,7 @@ describe('DOM Extended Functionality', () => {
|
|
|
38
38
|
|
|
39
39
|
it('should handle comments in innerHTML', () => {
|
|
40
40
|
const doc = parseHTML('<div><!-- comment -->text</div>') as Document;
|
|
41
|
-
const div = doc.
|
|
41
|
+
const div = doc.body?.firstChild as HTMLElement;
|
|
42
42
|
|
|
43
43
|
expect(div.innerHTML).toBe('<!-- comment -->text');
|
|
44
44
|
});
|
|
@@ -47,21 +47,21 @@ describe('DOM Extended Functionality', () => {
|
|
|
47
47
|
describe('textContent property', () => {
|
|
48
48
|
it('should provide textContent on elements', () => {
|
|
49
49
|
const doc = parseHTML('<div>Hello <span>World</span></div>') as Document;
|
|
50
|
-
const div = doc.
|
|
50
|
+
const div = doc.body?.firstChild as HTMLElement;
|
|
51
51
|
|
|
52
52
|
expect(div.textContent).toBe('Hello World');
|
|
53
53
|
});
|
|
54
54
|
|
|
55
55
|
it('should provide textContent for deeply nested elements', () => {
|
|
56
56
|
const doc = parseHTML('<div><p><em>Hello</em> <strong>Beautiful</strong></p> <span>World</span></div>') as Document;
|
|
57
|
-
const div = doc.
|
|
57
|
+
const div = doc.body?.firstChild as HTMLElement;
|
|
58
58
|
|
|
59
59
|
expect(div.textContent).toBe('Hello Beautiful World');
|
|
60
60
|
});
|
|
61
61
|
|
|
62
62
|
it('should ignore comments in textContent', () => {
|
|
63
63
|
const doc = parseHTML('<div>Hello <!-- comment --> World</div>') as Document;
|
|
64
|
-
const div = doc.
|
|
64
|
+
const div = doc.body?.firstChild as HTMLElement;
|
|
65
65
|
|
|
66
66
|
expect(div.textContent).toBe('Hello World');
|
|
67
67
|
});
|
|
@@ -70,7 +70,7 @@ describe('DOM Extended Functionality', () => {
|
|
|
70
70
|
describe('element navigation properties', () => {
|
|
71
71
|
it('should provide parentElement property', () => {
|
|
72
72
|
const doc = parseHTML('<div><p>Hello</p></div>') as Document;
|
|
73
|
-
const div = doc.
|
|
73
|
+
const div = doc.body?.firstChild as HTMLElement;
|
|
74
74
|
const p = div.children[0];
|
|
75
75
|
|
|
76
76
|
expect(p).toBeDefined();
|
|
@@ -79,7 +79,7 @@ describe('DOM Extended Functionality', () => {
|
|
|
79
79
|
|
|
80
80
|
it('should provide firstElementChild and lastElementChild', () => {
|
|
81
81
|
const doc = parseHTML('<div><span>First</span><p>Second</p><em>Last</em></div>') as Document;
|
|
82
|
-
const div = doc.
|
|
82
|
+
const div = doc.body?.firstChild as HTMLElement;
|
|
83
83
|
|
|
84
84
|
expect(div.firstElementChild?.tagName).toBe('SPAN');
|
|
85
85
|
expect(div.lastElementChild?.tagName).toBe('EM');
|
|
@@ -87,7 +87,7 @@ describe('DOM Extended Functionality', () => {
|
|
|
87
87
|
|
|
88
88
|
it('should provide nextElementSibling and previousElementSibling', () => {
|
|
89
89
|
const doc = parseHTML('<div><span>First</span><p>Second</p><em>Last</em></div>') as Document;
|
|
90
|
-
const div = doc.
|
|
90
|
+
const div = doc.body?.firstChild as HTMLElement;
|
|
91
91
|
const span = div.children[0];
|
|
92
92
|
const p = div.children[1];
|
|
93
93
|
const em = div.children[2];
|
|
@@ -111,7 +111,7 @@ describe('DOM Extended Functionality', () => {
|
|
|
111
111
|
describe('setInnerHTML functionality', () => {
|
|
112
112
|
it('should clear existing content when setting innerHTML', () => {
|
|
113
113
|
const doc = parseHTML('<div><p>Old content</p></div>') as Document;
|
|
114
|
-
const div = doc.
|
|
114
|
+
const div = doc.body?.firstChild as HTMLElement;
|
|
115
115
|
|
|
116
116
|
setInnerHTML(div, 'New content');
|
|
117
117
|
|
|
@@ -897,18 +897,18 @@ describe("DOM Manipulation - prepend", () => {
|
|
|
897
897
|
|
|
898
898
|
describe("prepend on document", () => {
|
|
899
899
|
it("should prepend to document", () => {
|
|
900
|
-
const doc = parseHTML("
|
|
901
|
-
const
|
|
902
|
-
const
|
|
900
|
+
const doc = parseHTML("<div>Content</div>");
|
|
901
|
+
const initialChildCount = doc.childNodes.length;
|
|
902
|
+
const firstChild = doc.firstChild;
|
|
903
903
|
|
|
904
904
|
const newDiv = doc.createElement("div");
|
|
905
|
-
newDiv.textContent = "
|
|
905
|
+
newDiv.textContent = "Prepended";
|
|
906
906
|
|
|
907
907
|
doc.prepend(newDiv);
|
|
908
908
|
|
|
909
909
|
expect(doc.firstChild).toBe(newDiv);
|
|
910
|
-
expect(newDiv.nextSibling).toBe(
|
|
911
|
-
expect(doc.childNodes.length).toBe(
|
|
910
|
+
expect(newDiv.nextSibling).toBe(firstChild);
|
|
911
|
+
expect(doc.childNodes.length).toBe(initialChildCount + 1);
|
|
912
912
|
});
|
|
913
913
|
});
|
|
914
914
|
});
|
|
@@ -1121,16 +1121,15 @@ describe("DOM Manipulation - append", () => {
|
|
|
1121
1121
|
describe("append on document", () => {
|
|
1122
1122
|
it("should append to document", () => {
|
|
1123
1123
|
const doc = parseHTML("<div>Content</div>");
|
|
1124
|
-
const
|
|
1124
|
+
const initialChildCount = doc.childNodes.length;
|
|
1125
1125
|
|
|
1126
1126
|
const newDiv = doc.createElement("div");
|
|
1127
|
-
newDiv.textContent = "
|
|
1127
|
+
newDiv.textContent = "Appended";
|
|
1128
1128
|
|
|
1129
1129
|
doc.append(newDiv);
|
|
1130
1130
|
|
|
1131
1131
|
expect(doc.lastChild).toBe(newDiv);
|
|
1132
|
-
expect(
|
|
1133
|
-
expect(doc.childNodes.length).toBe(2);
|
|
1132
|
+
expect(doc.childNodes.length).toBe(initialChildCount + 1);
|
|
1134
1133
|
});
|
|
1135
1134
|
});
|
|
1136
1135
|
});
|
package/tests/dom.test.ts
CHANGED
|
@@ -10,6 +10,10 @@ import {
|
|
|
10
10
|
} from "../src/dom-simulator";
|
|
11
11
|
import { parse } from "../src/parser";
|
|
12
12
|
|
|
13
|
+
function getBodyContent(doc: any): any {
|
|
14
|
+
return doc.body?.firstChild;
|
|
15
|
+
}
|
|
16
|
+
|
|
13
17
|
describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
|
|
14
18
|
describe("parseHTML basic functionality", () => {
|
|
15
19
|
it("should return a Document object", () => {
|
|
@@ -21,8 +25,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
|
|
|
21
25
|
it("should parse simple HTML elements", () => {
|
|
22
26
|
const doc = parseHTML("<p>Hello World</p>");
|
|
23
27
|
|
|
24
|
-
|
|
25
|
-
const paragraph = doc.childNodes[0]!;
|
|
28
|
+
const paragraph = getBodyContent(doc);
|
|
26
29
|
|
|
27
30
|
expect(paragraph.nodeType).toBe(NodeType.ELEMENT_NODE);
|
|
28
31
|
expect(paragraph.nodeName).toBe("P");
|
|
@@ -31,7 +34,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
|
|
|
31
34
|
|
|
32
35
|
it("should parse text content correctly", () => {
|
|
33
36
|
const doc = parseHTML("<p>Hello World</p>");
|
|
34
|
-
const paragraph = doc
|
|
37
|
+
const paragraph = getBodyContent(doc);
|
|
35
38
|
|
|
36
39
|
expect(paragraph.childNodes.length).toBe(1);
|
|
37
40
|
const textNode = paragraph.childNodes[0]!;
|
|
@@ -44,7 +47,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
|
|
|
44
47
|
it("should parse nested elements", () => {
|
|
45
48
|
const doc = parseHTML("<div><p>Hello</p><span>World</span></div>");
|
|
46
49
|
|
|
47
|
-
const div = doc
|
|
50
|
+
const div = getBodyContent(doc);
|
|
48
51
|
expect(div.nodeName).toBe("DIV");
|
|
49
52
|
expect(div.childNodes.length).toBe(2);
|
|
50
53
|
|
|
@@ -57,17 +60,19 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
|
|
|
57
60
|
|
|
58
61
|
it("should handle attributes correctly", () => {
|
|
59
62
|
const doc = parseHTML('<p id="test" class="highlight">Content</p>');
|
|
60
|
-
const paragraph = doc
|
|
63
|
+
const paragraph = getBodyContent(doc) as any;
|
|
61
64
|
|
|
62
65
|
expect(paragraph.attributes.id).toBe("test");
|
|
63
66
|
expect(paragraph.attributes.class).toBe("highlight");
|
|
64
67
|
});
|
|
65
68
|
|
|
66
69
|
it("should parse comments", () => {
|
|
67
|
-
const doc = parseHTML("
|
|
70
|
+
const doc = parseHTML("<div><!-- This is a comment --></div><p>Hello</p>");
|
|
68
71
|
|
|
69
|
-
|
|
70
|
-
|
|
72
|
+
const body = doc.body;
|
|
73
|
+
expect(body.childNodes.length).toBe(2);
|
|
74
|
+
const div = body.childNodes[0]!;
|
|
75
|
+
const comment = div.childNodes[0]!;
|
|
71
76
|
|
|
72
77
|
expect(comment.nodeType).toBe(NodeType.COMMENT_NODE);
|
|
73
78
|
expect(comment.nodeName).toBe("#comment");
|
|
@@ -77,11 +82,11 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
|
|
|
77
82
|
it("should set parent-child relationships correctly", () => {
|
|
78
83
|
const doc = parseHTML("<div><p>Hello</p></div>");
|
|
79
84
|
|
|
80
|
-
const div = doc
|
|
85
|
+
const div = getBodyContent(doc);
|
|
81
86
|
const p = div.childNodes[0]!;
|
|
82
87
|
|
|
83
88
|
expect(p.parentNode).toBe(<any>div);
|
|
84
|
-
expect(div.parentNode).toBe(doc);
|
|
89
|
+
expect(div.parentNode).toBe(doc.body);
|
|
85
90
|
expect(div.firstChild).toBe(p);
|
|
86
91
|
expect(div.lastChild).toBe(p);
|
|
87
92
|
});
|
|
@@ -91,7 +96,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
|
|
|
91
96
|
"<div><p>First</p><span>Second</span><em>Third</em></div>"
|
|
92
97
|
);
|
|
93
98
|
|
|
94
|
-
const div = doc
|
|
99
|
+
const div = getBodyContent(doc);
|
|
95
100
|
const p = div.childNodes[0]!;
|
|
96
101
|
const span = div.childNodes[1]!;
|
|
97
102
|
const em = div.childNodes[2]!;
|
|
@@ -108,7 +113,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
|
|
|
108
113
|
it("should handle self-closing elements", () => {
|
|
109
114
|
const doc = parseHTML("<p>Before<br/>After</p>");
|
|
110
115
|
|
|
111
|
-
const p = doc
|
|
116
|
+
const p = getBodyContent(doc);
|
|
112
117
|
expect(p.childNodes.length).toBe(3);
|
|
113
118
|
|
|
114
119
|
const br = p.childNodes[1]!;
|
|
@@ -119,7 +124,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
|
|
|
119
124
|
it("should handle empty elements", () => {
|
|
120
125
|
const doc = parseHTML("<div></div>");
|
|
121
126
|
|
|
122
|
-
const div = doc
|
|
127
|
+
const div = getBodyContent(doc);
|
|
123
128
|
expect(div.childNodes.length).toBe(0);
|
|
124
129
|
expect(div.firstChild).toBeNull();
|
|
125
130
|
expect(div.lastChild).toBeNull();
|
|
@@ -154,7 +159,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
|
|
|
154
159
|
describe("getTextContent", () => {
|
|
155
160
|
it("should get text content from a simple text node", () => {
|
|
156
161
|
const doc = parseHTML("<p>Hello World</p>");
|
|
157
|
-
const p = doc
|
|
162
|
+
const p = getBodyContent(doc);
|
|
158
163
|
const textNode = p.childNodes[0]!;
|
|
159
164
|
|
|
160
165
|
expect(getTextContent(textNode)).toBe("Hello World");
|
|
@@ -162,14 +167,14 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
|
|
|
162
167
|
|
|
163
168
|
it("should get text content from an element with text", () => {
|
|
164
169
|
const doc = parseHTML("<p>Hello World</p>");
|
|
165
|
-
const p = doc
|
|
170
|
+
const p = getBodyContent(doc);
|
|
166
171
|
|
|
167
172
|
expect(getTextContent(p)).toBe("Hello World");
|
|
168
173
|
});
|
|
169
174
|
|
|
170
175
|
it("should get concatenated text from nested elements", () => {
|
|
171
176
|
const doc = parseHTML("<div>Hello <span>beautiful</span> world</div>");
|
|
172
|
-
const div = doc
|
|
177
|
+
const div = getBodyContent(doc);
|
|
173
178
|
|
|
174
179
|
expect(getTextContent(div)).toBe("Hello beautiful world");
|
|
175
180
|
});
|
|
@@ -178,28 +183,28 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
|
|
|
178
183
|
const doc = parseHTML(
|
|
179
184
|
"<div>Start <p>Middle <em>Deep <strong>Deeper</strong></em></p> End</div>"
|
|
180
185
|
);
|
|
181
|
-
const div = doc
|
|
186
|
+
const div = getBodyContent(doc);
|
|
182
187
|
|
|
183
188
|
expect(getTextContent(div)).toBe("Start Middle Deep Deeper End");
|
|
184
189
|
});
|
|
185
190
|
|
|
186
191
|
it("should return empty string for elements with no text", () => {
|
|
187
192
|
const doc = parseHTML("<div></div>");
|
|
188
|
-
const div = doc
|
|
193
|
+
const div = getBodyContent(doc);
|
|
189
194
|
|
|
190
195
|
expect(getTextContent(div)).toBe("");
|
|
191
196
|
});
|
|
192
197
|
|
|
193
198
|
it("should ignore comments when getting text content", () => {
|
|
194
199
|
const doc = parseHTML("<div>Before<!-- comment -->After</div>");
|
|
195
|
-
const div = doc
|
|
200
|
+
const div = getBodyContent(doc);
|
|
196
201
|
|
|
197
202
|
expect(getTextContent(div)).toBe("BeforeAfter");
|
|
198
203
|
});
|
|
199
204
|
|
|
200
205
|
it("should handle mixed content with self-closing elements", () => {
|
|
201
206
|
const doc = parseHTML("<p>Before<br/>After</p>");
|
|
202
|
-
const p = doc
|
|
207
|
+
const p = getBodyContent(doc);
|
|
203
208
|
|
|
204
209
|
expect(getTextContent(p)).toBe("BeforeAfter");
|
|
205
210
|
});
|
|
@@ -210,7 +215,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
|
|
|
210
215
|
const doc = parseHTML(
|
|
211
216
|
'<div id="test" class="highlight" data-value="123">Content</div>'
|
|
212
217
|
);
|
|
213
|
-
const div = doc
|
|
218
|
+
const div = getBodyContent(doc) as any;
|
|
214
219
|
|
|
215
220
|
expect(getAttribute(div, "id")).toBe("test");
|
|
216
221
|
expect(getAttribute(div, "class")).toBe("highlight");
|
|
@@ -219,7 +224,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
|
|
|
219
224
|
|
|
220
225
|
it("should return null for non-existing attributes", () => {
|
|
221
226
|
const doc = parseHTML('<div id="test">Content</div>');
|
|
222
|
-
const div = doc
|
|
227
|
+
const div = getBodyContent(doc) as any;
|
|
223
228
|
|
|
224
229
|
expect(getAttribute(div, "nonexistent")).toBeNull();
|
|
225
230
|
expect(getAttribute(div, "class")).toBeNull();
|
|
@@ -227,7 +232,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
|
|
|
227
232
|
|
|
228
233
|
it("should check if attributes exist", () => {
|
|
229
234
|
const doc = parseHTML('<div id="test" class="highlight">Content</div>');
|
|
230
|
-
const div = doc
|
|
235
|
+
const div = getBodyContent(doc) as any;
|
|
231
236
|
|
|
232
237
|
expect(hasAttribute(div, "id")).toBe(true);
|
|
233
238
|
expect(hasAttribute(div, "class")).toBe(true);
|
|
@@ -236,7 +241,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
|
|
|
236
241
|
|
|
237
242
|
it("should set new attributes", () => {
|
|
238
243
|
const doc = parseHTML("<div>Content</div>");
|
|
239
|
-
const div = doc
|
|
244
|
+
const div = getBodyContent(doc) as any;
|
|
240
245
|
|
|
241
246
|
setAttribute(div, "id", "new-id");
|
|
242
247
|
setAttribute(div, "class", "new-class");
|
|
@@ -249,7 +254,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
|
|
|
249
254
|
|
|
250
255
|
it("should update existing attributes", () => {
|
|
251
256
|
const doc = parseHTML('<div id="old-id" class="old-class">Content</div>');
|
|
252
|
-
const div = doc
|
|
257
|
+
const div = getBodyContent(doc) as any;
|
|
253
258
|
|
|
254
259
|
setAttribute(div, "id", "new-id");
|
|
255
260
|
setAttribute(div, "class", "new-class");
|
|
@@ -262,7 +267,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
|
|
|
262
267
|
const doc = parseHTML(
|
|
263
268
|
'<div id="test" class="highlight" data-value="123">Content</div>'
|
|
264
269
|
);
|
|
265
|
-
const div = doc
|
|
270
|
+
const div = getBodyContent(doc) as any;
|
|
266
271
|
|
|
267
272
|
removeAttribute(div, "class");
|
|
268
273
|
removeAttribute(div, "data-value");
|
|
@@ -276,7 +281,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
|
|
|
276
281
|
|
|
277
282
|
it("should handle removing non-existing attributes gracefully", () => {
|
|
278
283
|
const doc = parseHTML('<div id="test">Content</div>');
|
|
279
|
-
const div = doc
|
|
284
|
+
const div = getBodyContent(doc) as any;
|
|
280
285
|
|
|
281
286
|
removeAttribute(div, "nonexistent");
|
|
282
287
|
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { tokenize } from '../../src/tokenizer';
|
|
2
|
+
import { adaptTokens, type Html5libToken } from './tokenizer-adapter.ts';
|
|
3
|
+
|
|
4
|
+
describe('Tokenizer Adapter Tests', () => {
|
|
5
|
+
it('should adapt simple start tag', () => {
|
|
6
|
+
const tokens = tokenize('<div>');
|
|
7
|
+
const adapted = adaptTokens(tokens);
|
|
8
|
+
expect(adapted).toEqual([
|
|
9
|
+
['StartTag', 'div', {}]
|
|
10
|
+
]);
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
it('should adapt start tag with attributes', () => {
|
|
14
|
+
const tokens = tokenize('<div class="foo" id="bar">');
|
|
15
|
+
const adapted = adaptTokens(tokens);
|
|
16
|
+
expect(adapted).toEqual([
|
|
17
|
+
['StartTag', 'div', { class: 'foo', id: 'bar' }]
|
|
18
|
+
]);
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it('should adapt self-closing tag', () => {
|
|
22
|
+
const tokens = tokenize('<br/>');
|
|
23
|
+
const adapted = adaptTokens(tokens);
|
|
24
|
+
expect(adapted).toEqual([
|
|
25
|
+
['StartTag', 'br', {}, true]
|
|
26
|
+
]);
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
it('should adapt end tag', () => {
|
|
30
|
+
const tokens = tokenize('</div>');
|
|
31
|
+
const adapted = adaptTokens(tokens);
|
|
32
|
+
expect(adapted).toEqual([
|
|
33
|
+
['EndTag', 'div']
|
|
34
|
+
]);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it('should adapt text', () => {
|
|
38
|
+
const tokens = tokenize('hello world');
|
|
39
|
+
const adapted = adaptTokens(tokens);
|
|
40
|
+
expect(adapted).toEqual([
|
|
41
|
+
['Character', 'hello world']
|
|
42
|
+
]);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it('should adapt comment', () => {
|
|
46
|
+
const tokens = tokenize('<!-- comment -->');
|
|
47
|
+
const adapted = adaptTokens(tokens);
|
|
48
|
+
expect(adapted).toEqual([
|
|
49
|
+
['Comment', ' comment ']
|
|
50
|
+
]);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it('should adapt DOCTYPE', () => {
|
|
54
|
+
const tokens = tokenize('<!DOCTYPE html>');
|
|
55
|
+
const adapted = adaptTokens(tokens);
|
|
56
|
+
expect(adapted).toEqual([
|
|
57
|
+
['DOCTYPE', 'html', null, null, true]
|
|
58
|
+
]);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it('should adapt mixed content', () => {
|
|
62
|
+
const tokens = tokenize('<div>hello</div>');
|
|
63
|
+
const adapted = adaptTokens(tokens);
|
|
64
|
+
expect(adapted).toEqual([
|
|
65
|
+
['StartTag', 'div', {}],
|
|
66
|
+
['Character', 'hello'],
|
|
67
|
+
['EndTag', 'div']
|
|
68
|
+
]);
|
|
69
|
+
});
|
|
70
|
+
});
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
// tests/helpers/tokenizer-adapter.ts
|
|
2
|
+
|
|
3
|
+
import type { Token } from '../../src/tokenizer';
|
|
4
|
+
|
|
5
|
+
export type Html5libToken =
|
|
6
|
+
| ['StartTag', string, Record<string, string>]
|
|
7
|
+
| ['StartTag', string, Record<string, string>, boolean] // con self-closing flag
|
|
8
|
+
| ['EndTag', string]
|
|
9
|
+
| ['Character', string]
|
|
10
|
+
| ['Comment', string]
|
|
11
|
+
| ['DOCTYPE', string, string | null, string | null, boolean];
|
|
12
|
+
|
|
13
|
+
export function adaptTokens(tokens: Token[]): Html5libToken[] {
|
|
14
|
+
const result: Html5libToken[] = [];
|
|
15
|
+
|
|
16
|
+
for (const token of tokens) {
|
|
17
|
+
if (token.type === 'EOF') continue;
|
|
18
|
+
|
|
19
|
+
switch (token.type) {
|
|
20
|
+
case 'TAG_OPEN':
|
|
21
|
+
if (token.isClosing) {
|
|
22
|
+
result.push(['EndTag', token.value]);
|
|
23
|
+
} else {
|
|
24
|
+
const attrs = token.attributes || {};
|
|
25
|
+
if (token.isSelfClosing) {
|
|
26
|
+
result.push(['StartTag', token.value, attrs, true]);
|
|
27
|
+
} else {
|
|
28
|
+
result.push(['StartTag', token.value, attrs]);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
break;
|
|
32
|
+
|
|
33
|
+
case 'TAG_CLOSE':
|
|
34
|
+
result.push(['EndTag', token.value]);
|
|
35
|
+
break;
|
|
36
|
+
|
|
37
|
+
case 'TEXT':
|
|
38
|
+
result.push(['Character', token.value]);
|
|
39
|
+
break;
|
|
40
|
+
|
|
41
|
+
case 'COMMENT':
|
|
42
|
+
result.push(['Comment', token.value]);
|
|
43
|
+
break;
|
|
44
|
+
|
|
45
|
+
case 'DOCTYPE':
|
|
46
|
+
// Parsear DOCTYPE para extraer name, publicId, systemId
|
|
47
|
+
result.push(['DOCTYPE', token.value, null, null, true]);
|
|
48
|
+
break;
|
|
49
|
+
|
|
50
|
+
case 'CDATA':
|
|
51
|
+
result.push(['Character', token.value]);
|
|
52
|
+
break;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return result;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Función para comparar tokens, manejando casos especiales
|
|
60
|
+
export function compareTokens(actual: Html5libToken[], expected: any[]): boolean {
|
|
61
|
+
// Implementar comparación flexible
|
|
62
|
+
// - Coalescer Characters consecutivos
|
|
63
|
+
// - Ignorar diferencias de whitespace en algunos casos
|
|
64
|
+
return JSON.stringify(actual) === JSON.stringify(expected);
|
|
65
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { parseHTML } from '../../index.ts';
|
|
2
|
+
import { serializeToHtml5lib } from './tree-adapter.ts';
|
|
3
|
+
|
|
4
|
+
describe('Tree Adapter Tests', () => {
|
|
5
|
+
it('should serialize simple element', () => {
|
|
6
|
+
const doc = parseHTML('<div></div>');
|
|
7
|
+
const serialized = serializeToHtml5lib(doc);
|
|
8
|
+
expect(serialized).toContain('| <html>');
|
|
9
|
+
expect(serialized).toContain('| <body>');
|
|
10
|
+
expect(serialized).toContain('| <div>');
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
it('should serialize element with attributes', () => {
|
|
14
|
+
const doc = parseHTML('<div class="foo" id="bar"></div>');
|
|
15
|
+
const serialized = serializeToHtml5lib(doc);
|
|
16
|
+
expect(serialized).toContain('<div>');
|
|
17
|
+
expect(serialized).toContain('class="foo"');
|
|
18
|
+
expect(serialized).toContain('id="bar"');
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it('should serialize text content', () => {
|
|
22
|
+
const doc = parseHTML('<div>hello</div>');
|
|
23
|
+
const serialized = serializeToHtml5lib(doc);
|
|
24
|
+
expect(serialized).toContain('"hello"');
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
it('should serialize comment', () => {
|
|
28
|
+
const doc = parseHTML('<div><!-- comment --></div>');
|
|
29
|
+
const serialized = serializeToHtml5lib(doc);
|
|
30
|
+
expect(serialized).toContain('<!-- comment -->');
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it('should serialize DOCTYPE', () => {
|
|
34
|
+
const doc = parseHTML('<!DOCTYPE html><div></div>');
|
|
35
|
+
const serialized = serializeToHtml5lib(doc);
|
|
36
|
+
expect(serialized).toContain('<!DOCTYPE html>');
|
|
37
|
+
expect(serialized).toContain('<div>');
|
|
38
|
+
});
|
|
39
|
+
});
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
// tests/helpers/tree-adapter.ts
|
|
2
|
+
|
|
3
|
+
export interface SerializeOptions {
|
|
4
|
+
skipImplicitDoctype?: boolean;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export function serializeToHtml5lib(doc: any, options: SerializeOptions = {}): string {
|
|
8
|
+
const lines: string[] = [];
|
|
9
|
+
|
|
10
|
+
function serialize(node: any, depth: number): void {
|
|
11
|
+
const indent = '| ' + ' '.repeat(depth);
|
|
12
|
+
|
|
13
|
+
if (node.nodeType === 9) { // DOCUMENT
|
|
14
|
+
for (const child of node.childNodes || []) {
|
|
15
|
+
serialize(child, depth);
|
|
16
|
+
}
|
|
17
|
+
} else if (node.nodeType === 1) { // ELEMENT
|
|
18
|
+
const tagName = node.tagName.toLowerCase();
|
|
19
|
+
const ns = node.namespaceURI;
|
|
20
|
+
|
|
21
|
+
let nsPrefix = '';
|
|
22
|
+
if (ns === 'http://www.w3.org/2000/svg') {
|
|
23
|
+
nsPrefix = ' svg';
|
|
24
|
+
} else if (ns === 'http://www.w3.org/1998/Math/MathML') {
|
|
25
|
+
nsPrefix = ' math';
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
lines.push(`${indent}<${tagName}${nsPrefix}>`);
|
|
29
|
+
|
|
30
|
+
// Atributos en orden alfabético
|
|
31
|
+
const attrs = Object.entries(node.attributes || {}).sort(([a], [b]) => a.localeCompare(b));
|
|
32
|
+
for (const [name, value] of attrs) {
|
|
33
|
+
lines.push(`${indent} ${name}="${value}"`);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Template special case
|
|
37
|
+
if (node.tagName.toLowerCase() === 'template' && node.content) {
|
|
38
|
+
lines.push(`${indent} content`);
|
|
39
|
+
serialize(node.content, depth + 2);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Children
|
|
43
|
+
for (const child of node.childNodes || []) {
|
|
44
|
+
serialize(child, depth + 1);
|
|
45
|
+
}
|
|
46
|
+
} else if (node.nodeType === 3) { // TEXT
|
|
47
|
+
lines.push(`${indent}"${node.textContent}"`);
|
|
48
|
+
} else if (node.nodeType === 8) { // COMMENT
|
|
49
|
+
const commentData = node.data || node.nodeValue || node.textContent || '';
|
|
50
|
+
lines.push(`${indent}<!-- ${commentData} -->`);
|
|
51
|
+
} else if (node.nodeType === 10) { // DOCTYPE
|
|
52
|
+
if (!options.skipImplicitDoctype) {
|
|
53
|
+
lines.push(`${indent}<!DOCTYPE ${node.name || 'html'}>`);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
serialize(doc, 0);
|
|
59
|
+
return lines.join('\n') + '\n';
|
|
60
|
+
}
|