@tkeron/html-parser 1.1.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm_deploy.yml +14 -4
- package/README.md +6 -6
- package/bun.lock +6 -8
- package/check-versions.ts +147 -0
- package/index.ts +4 -8
- package/package.json +5 -6
- package/src/dom-simulator/append-child.ts +130 -0
- package/src/dom-simulator/append.ts +18 -0
- package/src/dom-simulator/attributes.ts +23 -0
- package/src/dom-simulator/clone-node.ts +51 -0
- package/src/dom-simulator/convert-ast-node-to-dom.ts +37 -0
- package/src/dom-simulator/create-cdata.ts +18 -0
- package/src/dom-simulator/create-comment.ts +23 -0
- package/src/dom-simulator/create-doctype.ts +24 -0
- package/src/dom-simulator/create-document.ts +81 -0
- package/src/dom-simulator/create-element.ts +195 -0
- package/src/dom-simulator/create-processing-instruction.ts +19 -0
- package/src/dom-simulator/create-temp-parent.ts +9 -0
- package/src/dom-simulator/create-text-node.ts +23 -0
- package/src/dom-simulator/escape-text-content.ts +6 -0
- package/src/dom-simulator/find-special-elements.ts +14 -0
- package/src/dom-simulator/get-text-content.ts +18 -0
- package/src/dom-simulator/index.ts +36 -0
- package/src/dom-simulator/inner-outer-html.ts +182 -0
- package/src/dom-simulator/insert-after.ts +20 -0
- package/src/dom-simulator/insert-before.ts +108 -0
- package/src/dom-simulator/matches.ts +26 -0
- package/src/dom-simulator/node-types.ts +26 -0
- package/src/dom-simulator/prepend.ts +24 -0
- package/src/dom-simulator/remove-child.ts +68 -0
- package/src/dom-simulator/remove.ts +7 -0
- package/src/dom-simulator/replace-child.ts +152 -0
- package/src/dom-simulator/set-text-content.ts +33 -0
- package/src/dom-simulator/update-element-content.ts +56 -0
- package/src/dom-simulator.ts +12 -1126
- package/src/encoding/constants.ts +8 -0
- package/src/encoding/detect-encoding.ts +21 -0
- package/src/encoding/index.ts +1 -0
- package/src/encoding/normalize-encoding.ts +6 -0
- package/src/html-entities.ts +2127 -0
- package/src/index.ts +5 -5
- package/src/parser/adoption-agency-helpers.ts +145 -0
- package/src/parser/constants.ts +137 -0
- package/src/parser/dom-to-ast.ts +79 -0
- package/src/parser/index.ts +9 -0
- package/src/parser/parse.ts +772 -0
- package/src/parser/types.ts +56 -0
- package/src/selectors/find-elements-descendant.ts +47 -0
- package/src/selectors/index.ts +2 -0
- package/src/selectors/matches-selector.ts +12 -0
- package/src/selectors/matches-token.ts +27 -0
- package/src/selectors/parse-selector.ts +48 -0
- package/src/selectors/query-selector-all.ts +43 -0
- package/src/selectors/query-selector.ts +6 -0
- package/src/selectors/types.ts +10 -0
- package/src/serializer/attributes.ts +74 -0
- package/src/serializer/escape.ts +13 -0
- package/src/serializer/index.ts +1 -0
- package/src/serializer/serialize-tokens.ts +511 -0
- package/src/tokenizer/calculate-position.ts +10 -0
- package/src/tokenizer/constants.ts +11 -0
- package/src/tokenizer/decode-entities.ts +64 -0
- package/src/tokenizer/index.ts +2 -0
- package/src/tokenizer/parse-attributes.ts +74 -0
- package/src/tokenizer/tokenize.ts +165 -0
- package/src/tokenizer/types.ts +25 -0
- package/tests/adoption-agency-helpers.test.ts +304 -0
- package/tests/advanced.test.ts +242 -221
- package/tests/cloneNode.test.ts +19 -66
- package/tests/custom-elements-head.test.ts +54 -55
- package/tests/dom-extended.test.ts +77 -64
- package/tests/dom-manipulation.test.ts +51 -24
- package/tests/dom.test.ts +15 -13
- package/tests/encoding/detect-encoding.test.ts +33 -0
- package/tests/google-dom.test.ts +2 -2
- package/tests/helpers/tokenizer-adapter.test.ts +29 -43
- package/tests/helpers/tokenizer-adapter.ts +36 -33
- package/tests/helpers/tree-adapter.test.ts +20 -20
- package/tests/helpers/tree-adapter.ts +34 -24
- package/tests/html-entities-text.test.ts +6 -2
- package/tests/innerhtml-void-elements.test.ts +52 -36
- package/tests/outerHTML-replacement.test.ts +37 -65
- package/tests/parser/dom-to-ast.test.ts +109 -0
- package/tests/parser/parse.test.ts +139 -0
- package/tests/parser.test.ts +281 -217
- package/tests/selectors/query-selector-all.test.ts +39 -0
- package/tests/selectors/query-selector.test.ts +42 -0
- package/tests/serializer/attributes.test.ts +132 -0
- package/tests/serializer/escape.test.ts +51 -0
- package/tests/serializer/serialize-tokens.test.ts +80 -0
- package/tests/serializer-core.test.ts +6 -6
- package/tests/serializer-injectmeta.test.ts +6 -6
- package/tests/serializer-optionaltags.test.ts +9 -6
- package/tests/serializer-options.test.ts +6 -6
- package/tests/serializer-whitespace.test.ts +6 -6
- package/tests/tokenizer/calculate-position.test.ts +34 -0
- package/tests/tokenizer/decode-entities.test.ts +31 -0
- package/tests/tokenizer/parse-attributes.test.ts +44 -0
- package/tests/tokenizer/tokenize.test.ts +757 -0
- package/tests/tokenizer-namedEntities.test.ts +10 -7
- package/tests/tokenizer-pendingSpecChanges.test.ts +10 -7
- package/tests/tokenizer.test.ts +268 -256
- package/tests/tree-construction-adoption01.test.ts +25 -16
- package/tests/tree-construction-adoption02.test.ts +30 -19
- package/tests/tree-construction-domjs-unsafe.test.ts +6 -4
- package/tests/tree-construction-entities02.test.ts +18 -16
- package/tests/tree-construction-html5test-com.test.ts +16 -10
- package/tests/tree-construction-math.test.ts +11 -9
- package/tests/tree-construction-namespace-sensitivity.test.ts +11 -9
- package/tests/tree-construction-noscript01.test.ts +11 -9
- package/tests/tree-construction-ruby.test.ts +6 -4
- package/tests/tree-construction-scriptdata01.test.ts +6 -4
- package/tests/tree-construction-svg.test.ts +6 -4
- package/tests/tree-construction-template.test.ts +6 -4
- package/tests/tree-construction-tests10.test.ts +6 -4
- package/tests/tree-construction-tests11.test.ts +6 -4
- package/tests/tree-construction-tests20.test.ts +7 -4
- package/tests/tree-construction-tests21.test.ts +7 -4
- package/tests/tree-construction-tests23.test.ts +7 -4
- package/tests/tree-construction-tests24.test.ts +7 -4
- package/tests/tree-construction-tests5.test.ts +6 -5
- package/tests/tree-construction-tests6.test.ts +6 -5
- package/tests/tree-construction-tests_innerHTML_1.test.ts +6 -5
- package/tests/void-elements.test.ts +85 -40
- package/tsconfig.json +1 -1
- package/src/css-selector.ts +0 -185
- package/src/encoding.ts +0 -39
- package/src/parser.ts +0 -682
- package/src/serializer.ts +0 -450
- package/src/tokenizer.ts +0 -325
- package/tests/selectors.test.ts +0 -128
|
@@ -1,34 +1,36 @@
|
|
|
1
|
-
import { describe, it, expect } from
|
|
2
|
-
import { parseHTML } from
|
|
3
|
-
import {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
it('should generate correct innerHTML for simple elements', () => {
|
|
10
|
-
const doc = parseHTML('<div>Hello World</div>') as Document;
|
|
1
|
+
import { describe, it, expect } from "bun:test";
|
|
2
|
+
import { parseHTML } from "../index";
|
|
3
|
+
import { setInnerHTML } from "../src/dom-simulator/index.js";
|
|
4
|
+
|
|
5
|
+
describe("DOM Extended Functionality", () => {
|
|
6
|
+
describe("innerHTML and outerHTML", () => {
|
|
7
|
+
it("should generate correct innerHTML for simple elements", () => {
|
|
8
|
+
const doc = parseHTML("<div>Hello World</div>") as Document;
|
|
11
9
|
const div = doc.body?.firstChild as HTMLElement;
|
|
12
10
|
|
|
13
|
-
expect(div.innerHTML).toBe(
|
|
11
|
+
expect(div.innerHTML).toBe("Hello World");
|
|
14
12
|
});
|
|
15
13
|
|
|
16
|
-
it(
|
|
17
|
-
const doc = parseHTML(
|
|
14
|
+
it("should generate correct innerHTML for nested elements", () => {
|
|
15
|
+
const doc = parseHTML(
|
|
16
|
+
"<div><p>Hello</p><span>World</span></div>",
|
|
17
|
+
) as Document;
|
|
18
18
|
const div = doc.body?.firstChild as HTMLElement;
|
|
19
19
|
|
|
20
|
-
expect(div.innerHTML).toBe(
|
|
20
|
+
expect(div.innerHTML).toBe("<p>Hello</p><span>World</span>");
|
|
21
21
|
});
|
|
22
22
|
|
|
23
|
-
it(
|
|
23
|
+
it("should generate correct outerHTML for elements", () => {
|
|
24
24
|
const doc = parseHTML('<div class="test">Hello</div>') as Document;
|
|
25
25
|
const div = doc.body?.firstChild as HTMLElement;
|
|
26
26
|
|
|
27
27
|
expect(div.outerHTML).toBe('<div class="test">Hello</div>');
|
|
28
28
|
});
|
|
29
29
|
|
|
30
|
-
it(
|
|
31
|
-
const doc = parseHTML(
|
|
30
|
+
it("should generate correct outerHTML for elements with multiple attributes", () => {
|
|
31
|
+
const doc = parseHTML(
|
|
32
|
+
'<input type="text" name="username" value="test">',
|
|
33
|
+
) as Document;
|
|
32
34
|
const input = doc.body?.firstChild as HTMLElement;
|
|
33
35
|
|
|
34
36
|
expect(input.outerHTML).toContain('type="text"');
|
|
@@ -36,40 +38,44 @@ describe('DOM Extended Functionality', () => {
|
|
|
36
38
|
expect(input.outerHTML).toContain('value="test"');
|
|
37
39
|
});
|
|
38
40
|
|
|
39
|
-
it(
|
|
40
|
-
const doc = parseHTML(
|
|
41
|
+
it("should handle comments in innerHTML", () => {
|
|
42
|
+
const doc = parseHTML("<div><!-- comment -->text</div>") as Document;
|
|
41
43
|
const div = doc.body?.firstChild as HTMLElement;
|
|
42
44
|
|
|
43
|
-
expect(div.innerHTML).toBe(
|
|
45
|
+
expect(div.innerHTML).toBe("<!-- comment -->text");
|
|
44
46
|
});
|
|
45
47
|
});
|
|
46
48
|
|
|
47
|
-
describe(
|
|
48
|
-
it(
|
|
49
|
-
const doc = parseHTML(
|
|
49
|
+
describe("textContent property", () => {
|
|
50
|
+
it("should provide textContent on elements", () => {
|
|
51
|
+
const doc = parseHTML("<div>Hello <span>World</span></div>") as Document;
|
|
50
52
|
const div = doc.body?.firstChild as HTMLElement;
|
|
51
53
|
|
|
52
|
-
expect(div.textContent).toBe(
|
|
54
|
+
expect(div.textContent).toBe("Hello World");
|
|
53
55
|
});
|
|
54
56
|
|
|
55
|
-
it(
|
|
56
|
-
const doc = parseHTML(
|
|
57
|
+
it("should provide textContent for deeply nested elements", () => {
|
|
58
|
+
const doc = parseHTML(
|
|
59
|
+
"<div><p><em>Hello</em> <strong>Beautiful</strong></p> <span>World</span></div>",
|
|
60
|
+
) as Document;
|
|
57
61
|
const div = doc.body?.firstChild as HTMLElement;
|
|
58
62
|
|
|
59
|
-
expect(div.textContent).toBe(
|
|
63
|
+
expect(div.textContent).toBe("Hello Beautiful World");
|
|
60
64
|
});
|
|
61
65
|
|
|
62
|
-
it(
|
|
63
|
-
const doc = parseHTML(
|
|
66
|
+
it("should ignore comments in textContent", () => {
|
|
67
|
+
const doc = parseHTML(
|
|
68
|
+
"<div>Hello <!-- comment --> World</div>",
|
|
69
|
+
) as Document;
|
|
64
70
|
const div = doc.body?.firstChild as HTMLElement;
|
|
65
71
|
|
|
66
|
-
expect(div.textContent).toBe(
|
|
72
|
+
expect(div.textContent).toBe("Hello World");
|
|
67
73
|
});
|
|
68
74
|
});
|
|
69
75
|
|
|
70
|
-
describe(
|
|
71
|
-
it(
|
|
72
|
-
const doc = parseHTML(
|
|
76
|
+
describe("element navigation properties", () => {
|
|
77
|
+
it("should provide parentElement property", () => {
|
|
78
|
+
const doc = parseHTML("<div><p>Hello</p></div>") as Document;
|
|
73
79
|
const div = doc.body?.firstChild as HTMLElement;
|
|
74
80
|
const p = div.children[0];
|
|
75
81
|
|
|
@@ -77,16 +83,20 @@ describe('DOM Extended Functionality', () => {
|
|
|
77
83
|
expect(p?.parentElement).toBe(div);
|
|
78
84
|
});
|
|
79
85
|
|
|
80
|
-
it(
|
|
81
|
-
const doc = parseHTML(
|
|
86
|
+
it("should provide firstElementChild and lastElementChild", () => {
|
|
87
|
+
const doc = parseHTML(
|
|
88
|
+
"<div><span>First</span><p>Second</p><em>Last</em></div>",
|
|
89
|
+
) as Document;
|
|
82
90
|
const div = doc.body?.firstChild as HTMLElement;
|
|
83
91
|
|
|
84
|
-
expect(div.firstElementChild?.tagName).toBe(
|
|
85
|
-
expect(div.lastElementChild?.tagName).toBe(
|
|
92
|
+
expect(div.firstElementChild?.tagName).toBe("SPAN");
|
|
93
|
+
expect(div.lastElementChild?.tagName).toBe("EM");
|
|
86
94
|
});
|
|
87
95
|
|
|
88
|
-
it(
|
|
89
|
-
const doc = parseHTML(
|
|
96
|
+
it("should provide nextElementSibling and previousElementSibling", () => {
|
|
97
|
+
const doc = parseHTML(
|
|
98
|
+
"<div><span>First</span><p>Second</p><em>Last</em></div>",
|
|
99
|
+
) as Document;
|
|
90
100
|
const div = doc.body?.firstChild as HTMLElement;
|
|
91
101
|
const span = div.children[0];
|
|
92
102
|
const p = div.children[1];
|
|
@@ -108,51 +118,57 @@ describe('DOM Extended Functionality', () => {
|
|
|
108
118
|
});
|
|
109
119
|
});
|
|
110
120
|
|
|
111
|
-
describe(
|
|
112
|
-
it(
|
|
113
|
-
const doc = parseHTML(
|
|
121
|
+
describe("setInnerHTML functionality", () => {
|
|
122
|
+
it("should clear existing content when setting innerHTML", () => {
|
|
123
|
+
const doc = parseHTML("<div><p>Old content</p></div>") as Document;
|
|
114
124
|
const div = doc.body?.firstChild as HTMLElement;
|
|
115
125
|
|
|
116
|
-
setInnerHTML(div,
|
|
126
|
+
setInnerHTML(div, "New content");
|
|
117
127
|
|
|
118
|
-
expect(div.innerHTML).toBe(
|
|
128
|
+
expect(div.innerHTML).toBe("New content");
|
|
119
129
|
expect(div.children.length).toBe(0);
|
|
120
130
|
expect(div.childNodes.length).toBe(1);
|
|
121
131
|
expect(div.childNodes[0]?.nodeType).toBe(3);
|
|
122
|
-
expect(div.childNodes[0]?.textContent).toBe(
|
|
132
|
+
expect(div.childNodes[0]?.textContent).toBe("New content");
|
|
123
133
|
});
|
|
124
134
|
});
|
|
125
135
|
|
|
126
|
-
describe(
|
|
127
|
-
it(
|
|
128
|
-
const doc = parseHTML(
|
|
136
|
+
describe("Document body property type validation", () => {
|
|
137
|
+
it("should have body property with HTMLElement type", () => {
|
|
138
|
+
const doc = parseHTML(
|
|
139
|
+
"<html><body><p>Content</p></body></html>",
|
|
140
|
+
) as Document;
|
|
129
141
|
|
|
130
142
|
expect(doc.body).toBeTruthy();
|
|
131
|
-
expect(doc.body?.tagName).toBe(
|
|
132
|
-
expect(doc.body?.innerHTML).toBe(
|
|
133
|
-
expect(doc.body?.textContent).toBe(
|
|
143
|
+
expect(doc.body?.tagName).toBe("BODY");
|
|
144
|
+
expect(doc.body?.innerHTML).toBe("<p>Content</p>");
|
|
145
|
+
expect(doc.body?.textContent).toBe("Content");
|
|
134
146
|
});
|
|
135
147
|
|
|
136
|
-
it(
|
|
137
|
-
const doc = parseHTML(
|
|
148
|
+
it("should have head property with HTMLElement type", () => {
|
|
149
|
+
const doc = parseHTML(
|
|
150
|
+
"<html><head><title>Test</title></head><body></body></html>",
|
|
151
|
+
) as Document;
|
|
138
152
|
|
|
139
153
|
expect(doc.head).toBeTruthy();
|
|
140
|
-
expect(doc.head?.tagName).toBe(
|
|
141
|
-
expect(doc.head?.innerHTML).toBe(
|
|
154
|
+
expect(doc.head?.tagName).toBe("HEAD");
|
|
155
|
+
expect(doc.head?.innerHTML).toBe("<title>Test</title>");
|
|
142
156
|
});
|
|
143
157
|
|
|
144
|
-
it(
|
|
145
|
-
const doc = parseHTML(
|
|
158
|
+
it("should have documentElement property with HTMLElement type", () => {
|
|
159
|
+
const doc = parseHTML(
|
|
160
|
+
"<html><head></head><body></body></html>",
|
|
161
|
+
) as Document;
|
|
146
162
|
|
|
147
163
|
expect(doc.documentElement).toBeTruthy();
|
|
148
|
-
expect(doc.documentElement?.tagName).toBe(
|
|
164
|
+
expect(doc.documentElement?.tagName).toBe("HTML");
|
|
149
165
|
expect(doc.documentElement?.children.length).toBe(2);
|
|
150
166
|
});
|
|
151
167
|
});
|
|
152
168
|
|
|
153
|
-
describe(
|
|
169
|
+
describe("DOM mutation and manipulation", () => {
|
|
154
170
|
it("should append an element and update innerHTML accordingly", () => {
|
|
155
|
-
const doc = parseHTML(
|
|
171
|
+
const doc = parseHTML("<html><head></head><body></body></html>");
|
|
156
172
|
|
|
157
173
|
const body = doc.querySelector("body");
|
|
158
174
|
|
|
@@ -164,10 +180,7 @@ describe('DOM Extended Functionality', () => {
|
|
|
164
180
|
|
|
165
181
|
const innerHTML = body?.innerHTML;
|
|
166
182
|
|
|
167
|
-
expect(innerHTML).toBe(
|
|
168
|
-
|
|
183
|
+
expect(innerHTML).toBe("<h1>Hello World</h1>");
|
|
169
184
|
});
|
|
170
185
|
});
|
|
171
|
-
|
|
172
|
-
|
|
173
186
|
});
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { describe, it, expect } from "bun:test";
|
|
2
2
|
import { parseHTML } from "../index";
|
|
3
|
-
import { NodeType } from "../src/dom-simulator";
|
|
3
|
+
import { NodeType } from "../src/dom-simulator/index.js";
|
|
4
4
|
|
|
5
5
|
describe("DOM Manipulation - insertBefore", () => {
|
|
6
6
|
describe("Basic insertBefore functionality", () => {
|
|
@@ -255,7 +255,7 @@ describe("DOM Manipulation - insertBefore", () => {
|
|
|
255
255
|
describe("insertBefore with node relocation", () => {
|
|
256
256
|
it("should remove node from previous parent when inserting", () => {
|
|
257
257
|
const doc = parseHTML(
|
|
258
|
-
"<div id='parent1'><span>Child</span></div><div id='parent2'></div>"
|
|
258
|
+
"<div id='parent1'><span>Child</span></div><div id='parent2'></div>",
|
|
259
259
|
);
|
|
260
260
|
const parent1 = doc.querySelector("#parent1");
|
|
261
261
|
const parent2 = doc.querySelector("#parent2");
|
|
@@ -270,7 +270,7 @@ describe("DOM Manipulation - insertBefore", () => {
|
|
|
270
270
|
|
|
271
271
|
it("should update all relationships when moving node between parents", () => {
|
|
272
272
|
const doc = parseHTML(
|
|
273
|
-
"<div id='p1'><span>A</span><span>B</span></div><div id='p2'><span>C</span></div>"
|
|
273
|
+
"<div id='p1'><span>A</span><span>B</span></div><div id='p2'><span>C</span></div>",
|
|
274
274
|
);
|
|
275
275
|
const parent1 = doc.querySelector("#p1");
|
|
276
276
|
const parent2 = doc.querySelector("#p2");
|
|
@@ -319,7 +319,7 @@ describe("DOM Manipulation - replaceChild", () => {
|
|
|
319
319
|
|
|
320
320
|
it("should replace middle child in multiple children", () => {
|
|
321
321
|
const doc = parseHTML(
|
|
322
|
-
"<div><span>First</span><span>Old</span><span>Third</span></div>"
|
|
322
|
+
"<div><span>First</span><span>Old</span><span>Third</span></div>",
|
|
323
323
|
);
|
|
324
324
|
const div = doc.querySelector("div");
|
|
325
325
|
const oldSpan = div.childNodes[1];
|
|
@@ -383,7 +383,7 @@ describe("DOM Manipulation - replaceChild", () => {
|
|
|
383
383
|
describe("replaceChild sibling relationships", () => {
|
|
384
384
|
it("should transfer sibling relationships to new node", () => {
|
|
385
385
|
const doc = parseHTML(
|
|
386
|
-
"<div><span>A</span><span>Old</span><span>C</span></div>"
|
|
386
|
+
"<div><span>A</span><span>Old</span><span>C</span></div>",
|
|
387
387
|
);
|
|
388
388
|
const div = doc.querySelector("div");
|
|
389
389
|
const spanA = div.childNodes[0];
|
|
@@ -430,7 +430,9 @@ describe("DOM Manipulation - replaceChild", () => {
|
|
|
430
430
|
});
|
|
431
431
|
|
|
432
432
|
it("should clear old child's relationships", () => {
|
|
433
|
-
const doc = parseHTML(
|
|
433
|
+
const doc = parseHTML(
|
|
434
|
+
"<div><span>A</span><span>Old</span><span>C</span></div>",
|
|
435
|
+
);
|
|
434
436
|
const div = doc.querySelector("div");
|
|
435
437
|
const oldSpan = div.childNodes[1];
|
|
436
438
|
|
|
@@ -447,7 +449,7 @@ describe("DOM Manipulation - replaceChild", () => {
|
|
|
447
449
|
describe("replaceChild element-specific relationships", () => {
|
|
448
450
|
it("should update children array when replacing element with element", () => {
|
|
449
451
|
const doc = parseHTML(
|
|
450
|
-
"<div><span>A</span><span>Old</span><span>C</span></div>"
|
|
452
|
+
"<div><span>A</span><span>Old</span><span>C</span></div>",
|
|
451
453
|
);
|
|
452
454
|
const div = doc.querySelector("div");
|
|
453
455
|
const oldSpan = div.children[1];
|
|
@@ -464,7 +466,7 @@ describe("DOM Manipulation - replaceChild", () => {
|
|
|
464
466
|
|
|
465
467
|
it("should update element sibling relationships", () => {
|
|
466
468
|
const doc = parseHTML(
|
|
467
|
-
"<div><span>A</span><span>Old</span><span>C</span></div>"
|
|
469
|
+
"<div><span>A</span><span>Old</span><span>C</span></div>",
|
|
468
470
|
);
|
|
469
471
|
const div = doc.querySelector("div");
|
|
470
472
|
const spanA = div.children[0];
|
|
@@ -504,7 +506,7 @@ describe("DOM Manipulation - replaceChild", () => {
|
|
|
504
506
|
|
|
505
507
|
it("should remove from children array when replacing element with text", () => {
|
|
506
508
|
const doc = parseHTML(
|
|
507
|
-
"<div><span>A</span><span>Old</span><span>C</span></div>"
|
|
509
|
+
"<div><span>A</span><span>Old</span><span>C</span></div>",
|
|
508
510
|
);
|
|
509
511
|
const div = doc.querySelector("div");
|
|
510
512
|
const spanA = div.children[0];
|
|
@@ -582,7 +584,7 @@ describe("DOM Manipulation - replaceChild", () => {
|
|
|
582
584
|
describe("replaceChild with node relocation", () => {
|
|
583
585
|
it("should remove node from previous parent before replacing", () => {
|
|
584
586
|
const doc = parseHTML(
|
|
585
|
-
"<div id='p1'><span>Moving</span></div><div id='p2'><span>Old</span></div>"
|
|
587
|
+
"<div id='p1'><span>Moving</span></div><div id='p2'><span>Old</span></div>",
|
|
586
588
|
);
|
|
587
589
|
const parent1 = doc.querySelector("#p1");
|
|
588
590
|
const parent2 = doc.querySelector("#p2");
|
|
@@ -830,7 +832,9 @@ describe("DOM Manipulation - prepend", () => {
|
|
|
830
832
|
|
|
831
833
|
describe("prepend with parent relocation", () => {
|
|
832
834
|
it("should move node from another parent when prepending", () => {
|
|
833
|
-
const doc = parseHTML(
|
|
835
|
+
const doc = parseHTML(
|
|
836
|
+
"<div id='a'><span>Child</span></div><div id='b'><span>Other</span></div>",
|
|
837
|
+
);
|
|
834
838
|
const divA = doc.querySelector("#a");
|
|
835
839
|
const divB = doc.querySelector("#b");
|
|
836
840
|
const child = divA.querySelector("span");
|
|
@@ -844,7 +848,9 @@ describe("DOM Manipulation - prepend", () => {
|
|
|
844
848
|
});
|
|
845
849
|
|
|
846
850
|
it("should remove from old parent before prepending", () => {
|
|
847
|
-
const doc = parseHTML(
|
|
851
|
+
const doc = parseHTML(
|
|
852
|
+
"<div id='a'><span id='1'>1</span><span id='2'>2</span></div><div id='b'></div>",
|
|
853
|
+
);
|
|
848
854
|
const divA = doc.querySelector("#a");
|
|
849
855
|
const divB = doc.querySelector("#b");
|
|
850
856
|
const span1 = doc.querySelector("#1");
|
|
@@ -1005,7 +1011,6 @@ describe("DOM Manipulation - append", () => {
|
|
|
1005
1011
|
it("should append text node after elements", () => {
|
|
1006
1012
|
const doc = parseHTML("<div><span>Element</span></div>");
|
|
1007
1013
|
const div = doc.querySelector("div");
|
|
1008
|
-
const span = div.childNodes[0];
|
|
1009
1014
|
|
|
1010
1015
|
const textNode = doc.createTextNode(" Text");
|
|
1011
1016
|
|
|
@@ -1053,7 +1058,9 @@ describe("DOM Manipulation - append", () => {
|
|
|
1053
1058
|
|
|
1054
1059
|
describe("append with parent relocation", () => {
|
|
1055
1060
|
it("should move node from another parent when appending", () => {
|
|
1056
|
-
const doc = parseHTML(
|
|
1061
|
+
const doc = parseHTML(
|
|
1062
|
+
"<div id='a'><span>Child</span></div><div id='b'><span>Other</span></div>",
|
|
1063
|
+
);
|
|
1057
1064
|
const divA = doc.querySelector("#a");
|
|
1058
1065
|
const divB = doc.querySelector("#b");
|
|
1059
1066
|
const child = divA.querySelector("span");
|
|
@@ -1067,7 +1074,9 @@ describe("DOM Manipulation - append", () => {
|
|
|
1067
1074
|
});
|
|
1068
1075
|
|
|
1069
1076
|
it("should remove from old parent before appending", () => {
|
|
1070
|
-
const doc = parseHTML(
|
|
1077
|
+
const doc = parseHTML(
|
|
1078
|
+
"<div id='a'><span id='1'>1</span><span id='2'>2</span></div><div id='b'></div>",
|
|
1079
|
+
);
|
|
1071
1080
|
const divA = doc.querySelector("#a");
|
|
1072
1081
|
const divB = doc.querySelector("#b");
|
|
1073
1082
|
const span2 = doc.querySelector("#2");
|
|
@@ -1137,7 +1146,9 @@ describe("DOM Manipulation - append", () => {
|
|
|
1137
1146
|
describe("DOM Manipulation - remove", () => {
|
|
1138
1147
|
describe("Basic remove functionality", () => {
|
|
1139
1148
|
it("should remove an element from its parent", () => {
|
|
1140
|
-
const doc = parseHTML(
|
|
1149
|
+
const doc = parseHTML(
|
|
1150
|
+
"<div><span id='1'>First</span><span id='2'>Second</span></div>",
|
|
1151
|
+
);
|
|
1141
1152
|
const div = doc.querySelector("div");
|
|
1142
1153
|
const span1 = doc.querySelector("#1");
|
|
1143
1154
|
|
|
@@ -1170,7 +1181,9 @@ describe("DOM Manipulation - remove", () => {
|
|
|
1170
1181
|
});
|
|
1171
1182
|
|
|
1172
1183
|
it("should remove first child", () => {
|
|
1173
|
-
const doc = parseHTML(
|
|
1184
|
+
const doc = parseHTML(
|
|
1185
|
+
"<div><span>First</span><span>Second</span><span>Third</span></div>",
|
|
1186
|
+
);
|
|
1174
1187
|
const div = doc.querySelector("div");
|
|
1175
1188
|
const first = div.childNodes[0];
|
|
1176
1189
|
|
|
@@ -1182,7 +1195,9 @@ describe("DOM Manipulation - remove", () => {
|
|
|
1182
1195
|
});
|
|
1183
1196
|
|
|
1184
1197
|
it("should remove last child", () => {
|
|
1185
|
-
const doc = parseHTML(
|
|
1198
|
+
const doc = parseHTML(
|
|
1199
|
+
"<div><span>First</span><span>Second</span><span>Third</span></div>",
|
|
1200
|
+
);
|
|
1186
1201
|
const div = doc.querySelector("div");
|
|
1187
1202
|
const last = div.childNodes[2];
|
|
1188
1203
|
|
|
@@ -1194,7 +1209,9 @@ describe("DOM Manipulation - remove", () => {
|
|
|
1194
1209
|
});
|
|
1195
1210
|
|
|
1196
1211
|
it("should remove middle child", () => {
|
|
1197
|
-
const doc = parseHTML(
|
|
1212
|
+
const doc = parseHTML(
|
|
1213
|
+
"<div><span>First</span><span>Second</span><span>Third</span></div>",
|
|
1214
|
+
);
|
|
1198
1215
|
const div = doc.querySelector("div");
|
|
1199
1216
|
const middle = div.childNodes[1];
|
|
1200
1217
|
|
|
@@ -1208,7 +1225,9 @@ describe("DOM Manipulation - remove", () => {
|
|
|
1208
1225
|
|
|
1209
1226
|
describe("remove sibling relationships", () => {
|
|
1210
1227
|
it("should update nextSibling and previousSibling correctly", () => {
|
|
1211
|
-
const doc = parseHTML(
|
|
1228
|
+
const doc = parseHTML(
|
|
1229
|
+
"<div><span>A</span><span>B</span><span>C</span></div>",
|
|
1230
|
+
);
|
|
1212
1231
|
const div = doc.querySelector("div");
|
|
1213
1232
|
const spanA = div.childNodes[0];
|
|
1214
1233
|
const spanB = div.childNodes[1];
|
|
@@ -1223,7 +1242,9 @@ describe("DOM Manipulation - remove", () => {
|
|
|
1223
1242
|
});
|
|
1224
1243
|
|
|
1225
1244
|
it("should update element sibling relationships", () => {
|
|
1226
|
-
const doc = parseHTML(
|
|
1245
|
+
const doc = parseHTML(
|
|
1246
|
+
"<div><span>A</span><span>B</span><span>C</span></div>",
|
|
1247
|
+
);
|
|
1227
1248
|
const div = doc.querySelector("div");
|
|
1228
1249
|
const spanA = div.childNodes[0];
|
|
1229
1250
|
const spanB = div.childNodes[1];
|
|
@@ -1276,7 +1297,9 @@ describe("DOM Manipulation - remove", () => {
|
|
|
1276
1297
|
|
|
1277
1298
|
describe("remove synchronization", () => {
|
|
1278
1299
|
it("should update innerHTML correctly", () => {
|
|
1279
|
-
const doc = parseHTML(
|
|
1300
|
+
const doc = parseHTML(
|
|
1301
|
+
"<div><span>A</span><span>B</span><span>C</span></div>",
|
|
1302
|
+
);
|
|
1280
1303
|
const div = doc.querySelector("div");
|
|
1281
1304
|
const spanB = div.childNodes[1];
|
|
1282
1305
|
|
|
@@ -1296,7 +1319,9 @@ describe("DOM Manipulation - remove", () => {
|
|
|
1296
1319
|
});
|
|
1297
1320
|
|
|
1298
1321
|
it("should update children array correctly", () => {
|
|
1299
|
-
const doc = parseHTML(
|
|
1322
|
+
const doc = parseHTML(
|
|
1323
|
+
"<div><span>A</span><span>B</span><span>C</span></div>",
|
|
1324
|
+
);
|
|
1300
1325
|
const div = doc.querySelector("div");
|
|
1301
1326
|
const spanB = div.children[1];
|
|
1302
1327
|
|
|
@@ -1310,7 +1335,9 @@ describe("DOM Manipulation - remove", () => {
|
|
|
1310
1335
|
|
|
1311
1336
|
describe("remove multiple elements", () => {
|
|
1312
1337
|
it("should remove multiple elements sequentially", () => {
|
|
1313
|
-
const doc = parseHTML(
|
|
1338
|
+
const doc = parseHTML(
|
|
1339
|
+
"<div><span>A</span><span>B</span><span>C</span></div>",
|
|
1340
|
+
);
|
|
1314
1341
|
const div = doc.querySelector("div");
|
|
1315
1342
|
const spanA = div.childNodes[0];
|
|
1316
1343
|
const spanB = div.childNodes[1];
|
package/tests/dom.test.ts
CHANGED
|
@@ -7,8 +7,8 @@ import {
|
|
|
7
7
|
hasAttribute,
|
|
8
8
|
setAttribute,
|
|
9
9
|
removeAttribute,
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
createDocument,
|
|
11
|
+
} from "../src/dom-simulator/index.js";
|
|
12
12
|
|
|
13
13
|
function getBodyContent(doc: any): any {
|
|
14
14
|
return doc.body?.firstChild;
|
|
@@ -67,7 +67,9 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
|
|
|
67
67
|
});
|
|
68
68
|
|
|
69
69
|
it("should parse comments", () => {
|
|
70
|
-
const doc = parseHTML(
|
|
70
|
+
const doc = parseHTML(
|
|
71
|
+
"<div><!-- This is a comment --></div><p>Hello</p>",
|
|
72
|
+
);
|
|
71
73
|
|
|
72
74
|
const body = doc.body;
|
|
73
75
|
expect(body.childNodes.length).toBe(2);
|
|
@@ -93,7 +95,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
|
|
|
93
95
|
|
|
94
96
|
it("should set sibling relationships correctly", () => {
|
|
95
97
|
const doc = parseHTML(
|
|
96
|
-
"<div><p>First</p><span>Second</span><em>Third</em></div>"
|
|
98
|
+
"<div><p>First</p><span>Second</span><em>Third</em></div>",
|
|
97
99
|
);
|
|
98
100
|
|
|
99
101
|
const div = getBodyContent(doc);
|
|
@@ -181,7 +183,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
|
|
|
181
183
|
|
|
182
184
|
it("should get text from deeply nested elements", () => {
|
|
183
185
|
const doc = parseHTML(
|
|
184
|
-
"<div>Start <p>Middle <em>Deep <strong>Deeper</strong></em></p> End</div>"
|
|
186
|
+
"<div>Start <p>Middle <em>Deep <strong>Deeper</strong></em></p> End</div>",
|
|
185
187
|
);
|
|
186
188
|
const div = getBodyContent(doc);
|
|
187
189
|
|
|
@@ -213,7 +215,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
|
|
|
213
215
|
describe("Attribute functions", () => {
|
|
214
216
|
it("should get existing attributes", () => {
|
|
215
217
|
const doc = parseHTML(
|
|
216
|
-
'<div id="test" class="highlight" data-value="123">Content</div>'
|
|
218
|
+
'<div id="test" class="highlight" data-value="123">Content</div>',
|
|
217
219
|
);
|
|
218
220
|
const div = getBodyContent(doc) as any;
|
|
219
221
|
|
|
@@ -265,7 +267,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
|
|
|
265
267
|
|
|
266
268
|
it("should remove attributes", () => {
|
|
267
269
|
const doc = parseHTML(
|
|
268
|
-
'<div id="test" class="highlight" data-value="123">Content</div>'
|
|
270
|
+
'<div id="test" class="highlight" data-value="123">Content</div>',
|
|
269
271
|
);
|
|
270
272
|
const div = getBodyContent(doc) as any;
|
|
271
273
|
|
|
@@ -353,7 +355,7 @@ describe("DOM extra tests", () => {
|
|
|
353
355
|
});
|
|
354
356
|
|
|
355
357
|
it("should create a new Document", () => {
|
|
356
|
-
const doc =
|
|
358
|
+
const doc = createDocument();
|
|
357
359
|
expect(doc).toBeTruthy();
|
|
358
360
|
expect(doc.nodeType).toBe(NodeType.DOCUMENT_NODE);
|
|
359
361
|
expect(doc.nodeName).toBe("#document");
|
|
@@ -370,7 +372,7 @@ describe("DOM extra tests", () => {
|
|
|
370
372
|
|
|
371
373
|
expect(doc.head?.querySelector("title")?.textContent).toBe("Sample Page");
|
|
372
374
|
expect(doc.head?.querySelector("meta")?.getAttribute("charset")).toBe(
|
|
373
|
-
"UTF-8"
|
|
375
|
+
"UTF-8",
|
|
374
376
|
);
|
|
375
377
|
});
|
|
376
378
|
|
|
@@ -387,7 +389,7 @@ describe("DOM extra tests", () => {
|
|
|
387
389
|
expect(paragraphs.length).toBe(2);
|
|
388
390
|
expect(paragraphs[0]?.textContent).toBe("First paragraph.");
|
|
389
391
|
expect(paragraphs[1]?.textContent).toBe(
|
|
390
|
-
"Second paragraph with formatting."
|
|
392
|
+
"Second paragraph with formatting.",
|
|
391
393
|
);
|
|
392
394
|
|
|
393
395
|
const strong = section.querySelector("strong")!;
|
|
@@ -408,7 +410,7 @@ describe("DOM extra tests", () => {
|
|
|
408
410
|
|
|
409
411
|
const main = doc.body?.querySelector("main")!;
|
|
410
412
|
const commentNode = (main.childNodes as any).find(
|
|
411
|
-
(n: any) => n.nodeType === NodeType.COMMENT_NODE
|
|
413
|
+
(n: any) => n.nodeType === NodeType.COMMENT_NODE,
|
|
412
414
|
);
|
|
413
415
|
expect(commentNode).toBeTruthy();
|
|
414
416
|
expect(commentNode?.nodeValue?.trim()).toBe("Footer note");
|
|
@@ -425,7 +427,7 @@ describe("DOM extra tests", () => {
|
|
|
425
427
|
const header = doc.body?.querySelector("#main-header")!;
|
|
426
428
|
|
|
427
429
|
const clonedFooter = (doc.body?.querySelector("footer") as any).cloneNode(
|
|
428
|
-
true
|
|
430
|
+
true,
|
|
429
431
|
);
|
|
430
432
|
expect(clonedFooter.nodeName).toBe("FOOTER");
|
|
431
433
|
expect(clonedFooter.querySelector("a")?.textContent).toBe("Email us");
|
|
@@ -480,7 +482,7 @@ describe("DOM extra tests", () => {
|
|
|
480
482
|
|
|
481
483
|
expect(container.querySelector("h2")?.textContent).toBe("Dynamic Content");
|
|
482
484
|
expect(container.querySelector("p")?.textContent).toBe(
|
|
483
|
-
"This is a dynamically added paragraph."
|
|
485
|
+
"This is a dynamically added paragraph.",
|
|
484
486
|
);
|
|
485
487
|
expect(container.querySelectorAll("li").length).toBe(2);
|
|
486
488
|
});
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { expect, it } from "bun:test";
|
|
2
|
+
import { detectEncoding } from "../../src/encoding/index.ts";
|
|
3
|
+
|
|
4
|
+
it("should detect charset from meta tag", () => {
|
|
5
|
+
const html = '<html><head><meta charset="utf-8"></head></html>';
|
|
6
|
+
expect(detectEncoding(html)).toBe("utf-8");
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
it("should detect charset from meta tag with single quotes", () => {
|
|
10
|
+
const html = "<html><head><meta charset='iso-8859-1'></head></html>";
|
|
11
|
+
expect(detectEncoding(html)).toBe("windows-1252");
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
it("should detect charset from content-type meta", () => {
|
|
15
|
+
const html =
|
|
16
|
+
'<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head></html>';
|
|
17
|
+
expect(detectEncoding(html)).toBe("utf-8");
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it("should return windows-1252 as default", () => {
|
|
21
|
+
const html = "<html><head></head></html>";
|
|
22
|
+
expect(detectEncoding(html)).toBe("windows-1252");
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it("should normalize encoding aliases", () => {
|
|
26
|
+
const html = '<html><head><meta charset="UTF-8"></head></html>';
|
|
27
|
+
expect(detectEncoding(html)).toBe("utf-8");
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("should handle case insensitive charset", () => {
|
|
31
|
+
const html = '<html><head><meta CHARSET="utf-8"></head></html>';
|
|
32
|
+
expect(detectEncoding(html)).toBe("utf-8");
|
|
33
|
+
});
|
package/tests/google-dom.test.ts
CHANGED
|
@@ -100,8 +100,8 @@ describe("Google DOM Parsing Test", () => {
|
|
|
100
100
|
for (let i = 0; i < Math.min(navLinks.length, 5); i++) {
|
|
101
101
|
const link = navLinks[i];
|
|
102
102
|
if (link) {
|
|
103
|
-
|
|
104
|
-
|
|
103
|
+
link.getAttribute("href");
|
|
104
|
+
link.textContent?.trim();
|
|
105
105
|
}
|
|
106
106
|
}
|
|
107
107
|
}
|