@tkeron/html-parser 1.1.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/.github/workflows/npm_deploy.yml +14 -4
  2. package/README.md +6 -6
  3. package/bun.lock +6 -8
  4. package/check-versions.ts +147 -0
  5. package/index.ts +4 -8
  6. package/package.json +5 -6
  7. package/src/dom-simulator/append-child.ts +130 -0
  8. package/src/dom-simulator/append.ts +18 -0
  9. package/src/dom-simulator/attributes.ts +23 -0
  10. package/src/dom-simulator/clone-node.ts +51 -0
  11. package/src/dom-simulator/convert-ast-node-to-dom.ts +37 -0
  12. package/src/dom-simulator/create-cdata.ts +18 -0
  13. package/src/dom-simulator/create-comment.ts +23 -0
  14. package/src/dom-simulator/create-doctype.ts +24 -0
  15. package/src/dom-simulator/create-document.ts +81 -0
  16. package/src/dom-simulator/create-element.ts +195 -0
  17. package/src/dom-simulator/create-processing-instruction.ts +19 -0
  18. package/src/dom-simulator/create-temp-parent.ts +9 -0
  19. package/src/dom-simulator/create-text-node.ts +23 -0
  20. package/src/dom-simulator/escape-text-content.ts +6 -0
  21. package/src/dom-simulator/find-special-elements.ts +14 -0
  22. package/src/dom-simulator/get-text-content.ts +18 -0
  23. package/src/dom-simulator/index.ts +36 -0
  24. package/src/dom-simulator/inner-outer-html.ts +182 -0
  25. package/src/dom-simulator/insert-after.ts +20 -0
  26. package/src/dom-simulator/insert-before.ts +108 -0
  27. package/src/dom-simulator/matches.ts +26 -0
  28. package/src/dom-simulator/node-types.ts +26 -0
  29. package/src/dom-simulator/prepend.ts +24 -0
  30. package/src/dom-simulator/remove-child.ts +68 -0
  31. package/src/dom-simulator/remove.ts +7 -0
  32. package/src/dom-simulator/replace-child.ts +152 -0
  33. package/src/dom-simulator/set-text-content.ts +33 -0
  34. package/src/dom-simulator/update-element-content.ts +56 -0
  35. package/src/dom-simulator.ts +12 -1126
  36. package/src/encoding/constants.ts +8 -0
  37. package/src/encoding/detect-encoding.ts +21 -0
  38. package/src/encoding/index.ts +1 -0
  39. package/src/encoding/normalize-encoding.ts +6 -0
  40. package/src/html-entities.ts +2127 -0
  41. package/src/index.ts +5 -5
  42. package/src/parser/adoption-agency-helpers.ts +145 -0
  43. package/src/parser/constants.ts +137 -0
  44. package/src/parser/dom-to-ast.ts +79 -0
  45. package/src/parser/index.ts +9 -0
  46. package/src/parser/parse.ts +772 -0
  47. package/src/parser/types.ts +56 -0
  48. package/src/selectors/find-elements-descendant.ts +47 -0
  49. package/src/selectors/index.ts +2 -0
  50. package/src/selectors/matches-selector.ts +12 -0
  51. package/src/selectors/matches-token.ts +27 -0
  52. package/src/selectors/parse-selector.ts +48 -0
  53. package/src/selectors/query-selector-all.ts +43 -0
  54. package/src/selectors/query-selector.ts +6 -0
  55. package/src/selectors/types.ts +10 -0
  56. package/src/serializer/attributes.ts +74 -0
  57. package/src/serializer/escape.ts +13 -0
  58. package/src/serializer/index.ts +1 -0
  59. package/src/serializer/serialize-tokens.ts +511 -0
  60. package/src/tokenizer/calculate-position.ts +10 -0
  61. package/src/tokenizer/constants.ts +11 -0
  62. package/src/tokenizer/decode-entities.ts +64 -0
  63. package/src/tokenizer/index.ts +2 -0
  64. package/src/tokenizer/parse-attributes.ts +74 -0
  65. package/src/tokenizer/tokenize.ts +165 -0
  66. package/src/tokenizer/types.ts +25 -0
  67. package/tests/adoption-agency-helpers.test.ts +304 -0
  68. package/tests/advanced.test.ts +242 -221
  69. package/tests/cloneNode.test.ts +19 -66
  70. package/tests/custom-elements-head.test.ts +54 -55
  71. package/tests/dom-extended.test.ts +77 -64
  72. package/tests/dom-manipulation.test.ts +51 -24
  73. package/tests/dom.test.ts +15 -13
  74. package/tests/encoding/detect-encoding.test.ts +33 -0
  75. package/tests/google-dom.test.ts +2 -2
  76. package/tests/helpers/tokenizer-adapter.test.ts +29 -43
  77. package/tests/helpers/tokenizer-adapter.ts +36 -33
  78. package/tests/helpers/tree-adapter.test.ts +20 -20
  79. package/tests/helpers/tree-adapter.ts +34 -24
  80. package/tests/html-entities-text.test.ts +6 -2
  81. package/tests/innerhtml-void-elements.test.ts +52 -36
  82. package/tests/outerHTML-replacement.test.ts +37 -65
  83. package/tests/parser/dom-to-ast.test.ts +109 -0
  84. package/tests/parser/parse.test.ts +139 -0
  85. package/tests/parser.test.ts +281 -217
  86. package/tests/selectors/query-selector-all.test.ts +39 -0
  87. package/tests/selectors/query-selector.test.ts +42 -0
  88. package/tests/serializer/attributes.test.ts +132 -0
  89. package/tests/serializer/escape.test.ts +51 -0
  90. package/tests/serializer/serialize-tokens.test.ts +80 -0
  91. package/tests/serializer-core.test.ts +6 -6
  92. package/tests/serializer-injectmeta.test.ts +6 -6
  93. package/tests/serializer-optionaltags.test.ts +9 -6
  94. package/tests/serializer-options.test.ts +6 -6
  95. package/tests/serializer-whitespace.test.ts +6 -6
  96. package/tests/tokenizer/calculate-position.test.ts +34 -0
  97. package/tests/tokenizer/decode-entities.test.ts +31 -0
  98. package/tests/tokenizer/parse-attributes.test.ts +44 -0
  99. package/tests/tokenizer/tokenize.test.ts +757 -0
  100. package/tests/tokenizer-namedEntities.test.ts +10 -7
  101. package/tests/tokenizer-pendingSpecChanges.test.ts +10 -7
  102. package/tests/tokenizer.test.ts +268 -256
  103. package/tests/tree-construction-adoption01.test.ts +25 -16
  104. package/tests/tree-construction-adoption02.test.ts +30 -19
  105. package/tests/tree-construction-domjs-unsafe.test.ts +6 -4
  106. package/tests/tree-construction-entities02.test.ts +18 -16
  107. package/tests/tree-construction-html5test-com.test.ts +16 -10
  108. package/tests/tree-construction-math.test.ts +11 -9
  109. package/tests/tree-construction-namespace-sensitivity.test.ts +11 -9
  110. package/tests/tree-construction-noscript01.test.ts +11 -9
  111. package/tests/tree-construction-ruby.test.ts +6 -4
  112. package/tests/tree-construction-scriptdata01.test.ts +6 -4
  113. package/tests/tree-construction-svg.test.ts +6 -4
  114. package/tests/tree-construction-template.test.ts +6 -4
  115. package/tests/tree-construction-tests10.test.ts +6 -4
  116. package/tests/tree-construction-tests11.test.ts +6 -4
  117. package/tests/tree-construction-tests20.test.ts +7 -4
  118. package/tests/tree-construction-tests21.test.ts +7 -4
  119. package/tests/tree-construction-tests23.test.ts +7 -4
  120. package/tests/tree-construction-tests24.test.ts +7 -4
  121. package/tests/tree-construction-tests5.test.ts +6 -5
  122. package/tests/tree-construction-tests6.test.ts +6 -5
  123. package/tests/tree-construction-tests_innerHTML_1.test.ts +6 -5
  124. package/tests/void-elements.test.ts +85 -40
  125. package/tsconfig.json +1 -1
  126. package/src/css-selector.ts +0 -185
  127. package/src/encoding.ts +0 -39
  128. package/src/parser.ts +0 -682
  129. package/src/serializer.ts +0 -450
  130. package/src/tokenizer.ts +0 -325
  131. package/tests/selectors.test.ts +0 -128
@@ -1,34 +1,36 @@
1
- import { describe, it, expect } from 'bun:test';
2
- import { parseHTML } from '../index';
3
- import {
4
- setInnerHTML
5
- } from '../src/dom-simulator';
6
-
7
- describe('DOM Extended Functionality', () => {
8
- describe('innerHTML and outerHTML', () => {
9
- it('should generate correct innerHTML for simple elements', () => {
10
- const doc = parseHTML('<div>Hello World</div>') as Document;
1
+ import { describe, it, expect } from "bun:test";
2
+ import { parseHTML } from "../index";
3
+ import { setInnerHTML } from "../src/dom-simulator/index.js";
4
+
5
+ describe("DOM Extended Functionality", () => {
6
+ describe("innerHTML and outerHTML", () => {
7
+ it("should generate correct innerHTML for simple elements", () => {
8
+ const doc = parseHTML("<div>Hello World</div>") as Document;
11
9
  const div = doc.body?.firstChild as HTMLElement;
12
10
 
13
- expect(div.innerHTML).toBe('Hello World');
11
+ expect(div.innerHTML).toBe("Hello World");
14
12
  });
15
13
 
16
- it('should generate correct innerHTML for nested elements', () => {
17
- const doc = parseHTML('<div><p>Hello</p><span>World</span></div>') as Document;
14
+ it("should generate correct innerHTML for nested elements", () => {
15
+ const doc = parseHTML(
16
+ "<div><p>Hello</p><span>World</span></div>",
17
+ ) as Document;
18
18
  const div = doc.body?.firstChild as HTMLElement;
19
19
 
20
- expect(div.innerHTML).toBe('<p>Hello</p><span>World</span>');
20
+ expect(div.innerHTML).toBe("<p>Hello</p><span>World</span>");
21
21
  });
22
22
 
23
- it('should generate correct outerHTML for elements', () => {
23
+ it("should generate correct outerHTML for elements", () => {
24
24
  const doc = parseHTML('<div class="test">Hello</div>') as Document;
25
25
  const div = doc.body?.firstChild as HTMLElement;
26
26
 
27
27
  expect(div.outerHTML).toBe('<div class="test">Hello</div>');
28
28
  });
29
29
 
30
- it('should generate correct outerHTML for elements with multiple attributes', () => {
31
- const doc = parseHTML('<input type="text" name="username" value="test">') as Document;
30
+ it("should generate correct outerHTML for elements with multiple attributes", () => {
31
+ const doc = parseHTML(
32
+ '<input type="text" name="username" value="test">',
33
+ ) as Document;
32
34
  const input = doc.body?.firstChild as HTMLElement;
33
35
 
34
36
  expect(input.outerHTML).toContain('type="text"');
@@ -36,40 +38,44 @@ describe('DOM Extended Functionality', () => {
36
38
  expect(input.outerHTML).toContain('value="test"');
37
39
  });
38
40
 
39
- it('should handle comments in innerHTML', () => {
40
- const doc = parseHTML('<div><!-- comment -->text</div>') as Document;
41
+ it("should handle comments in innerHTML", () => {
42
+ const doc = parseHTML("<div><!-- comment -->text</div>") as Document;
41
43
  const div = doc.body?.firstChild as HTMLElement;
42
44
 
43
- expect(div.innerHTML).toBe('<!-- comment -->text');
45
+ expect(div.innerHTML).toBe("<!-- comment -->text");
44
46
  });
45
47
  });
46
48
 
47
- describe('textContent property', () => {
48
- it('should provide textContent on elements', () => {
49
- const doc = parseHTML('<div>Hello <span>World</span></div>') as Document;
49
+ describe("textContent property", () => {
50
+ it("should provide textContent on elements", () => {
51
+ const doc = parseHTML("<div>Hello <span>World</span></div>") as Document;
50
52
  const div = doc.body?.firstChild as HTMLElement;
51
53
 
52
- expect(div.textContent).toBe('Hello World');
54
+ expect(div.textContent).toBe("Hello World");
53
55
  });
54
56
 
55
- it('should provide textContent for deeply nested elements', () => {
56
- const doc = parseHTML('<div><p><em>Hello</em> <strong>Beautiful</strong></p> <span>World</span></div>') as Document;
57
+ it("should provide textContent for deeply nested elements", () => {
58
+ const doc = parseHTML(
59
+ "<div><p><em>Hello</em> <strong>Beautiful</strong></p> <span>World</span></div>",
60
+ ) as Document;
57
61
  const div = doc.body?.firstChild as HTMLElement;
58
62
 
59
- expect(div.textContent).toBe('Hello Beautiful World');
63
+ expect(div.textContent).toBe("Hello Beautiful World");
60
64
  });
61
65
 
62
- it('should ignore comments in textContent', () => {
63
- const doc = parseHTML('<div>Hello <!-- comment --> World</div>') as Document;
66
+ it("should ignore comments in textContent", () => {
67
+ const doc = parseHTML(
68
+ "<div>Hello <!-- comment --> World</div>",
69
+ ) as Document;
64
70
  const div = doc.body?.firstChild as HTMLElement;
65
71
 
66
- expect(div.textContent).toBe('Hello World');
72
+ expect(div.textContent).toBe("Hello World");
67
73
  });
68
74
  });
69
75
 
70
- describe('element navigation properties', () => {
71
- it('should provide parentElement property', () => {
72
- const doc = parseHTML('<div><p>Hello</p></div>') as Document;
76
+ describe("element navigation properties", () => {
77
+ it("should provide parentElement property", () => {
78
+ const doc = parseHTML("<div><p>Hello</p></div>") as Document;
73
79
  const div = doc.body?.firstChild as HTMLElement;
74
80
  const p = div.children[0];
75
81
 
@@ -77,16 +83,20 @@ describe('DOM Extended Functionality', () => {
77
83
  expect(p?.parentElement).toBe(div);
78
84
  });
79
85
 
80
- it('should provide firstElementChild and lastElementChild', () => {
81
- const doc = parseHTML('<div><span>First</span><p>Second</p><em>Last</em></div>') as Document;
86
+ it("should provide firstElementChild and lastElementChild", () => {
87
+ const doc = parseHTML(
88
+ "<div><span>First</span><p>Second</p><em>Last</em></div>",
89
+ ) as Document;
82
90
  const div = doc.body?.firstChild as HTMLElement;
83
91
 
84
- expect(div.firstElementChild?.tagName).toBe('SPAN');
85
- expect(div.lastElementChild?.tagName).toBe('EM');
92
+ expect(div.firstElementChild?.tagName).toBe("SPAN");
93
+ expect(div.lastElementChild?.tagName).toBe("EM");
86
94
  });
87
95
 
88
- it('should provide nextElementSibling and previousElementSibling', () => {
89
- const doc = parseHTML('<div><span>First</span><p>Second</p><em>Last</em></div>') as Document;
96
+ it("should provide nextElementSibling and previousElementSibling", () => {
97
+ const doc = parseHTML(
98
+ "<div><span>First</span><p>Second</p><em>Last</em></div>",
99
+ ) as Document;
90
100
  const div = doc.body?.firstChild as HTMLElement;
91
101
  const span = div.children[0];
92
102
  const p = div.children[1];
@@ -108,51 +118,57 @@ describe('DOM Extended Functionality', () => {
108
118
  });
109
119
  });
110
120
 
111
- describe('setInnerHTML functionality', () => {
112
- it('should clear existing content when setting innerHTML', () => {
113
- const doc = parseHTML('<div><p>Old content</p></div>') as Document;
121
+ describe("setInnerHTML functionality", () => {
122
+ it("should clear existing content when setting innerHTML", () => {
123
+ const doc = parseHTML("<div><p>Old content</p></div>") as Document;
114
124
  const div = doc.body?.firstChild as HTMLElement;
115
125
 
116
- setInnerHTML(div, 'New content');
126
+ setInnerHTML(div, "New content");
117
127
 
118
- expect(div.innerHTML).toBe('New content');
128
+ expect(div.innerHTML).toBe("New content");
119
129
  expect(div.children.length).toBe(0);
120
130
  expect(div.childNodes.length).toBe(1);
121
131
  expect(div.childNodes[0]?.nodeType).toBe(3);
122
- expect(div.childNodes[0]?.textContent).toBe('New content');
132
+ expect(div.childNodes[0]?.textContent).toBe("New content");
123
133
  });
124
134
  });
125
135
 
126
- describe('Document body property type validation', () => {
127
- it('should have body property with HTMLElement type', () => {
128
- const doc = parseHTML('<html><body><p>Content</p></body></html>') as Document;
136
+ describe("Document body property type validation", () => {
137
+ it("should have body property with HTMLElement type", () => {
138
+ const doc = parseHTML(
139
+ "<html><body><p>Content</p></body></html>",
140
+ ) as Document;
129
141
 
130
142
  expect(doc.body).toBeTruthy();
131
- expect(doc.body?.tagName).toBe('BODY');
132
- expect(doc.body?.innerHTML).toBe('<p>Content</p>');
133
- expect(doc.body?.textContent).toBe('Content');
143
+ expect(doc.body?.tagName).toBe("BODY");
144
+ expect(doc.body?.innerHTML).toBe("<p>Content</p>");
145
+ expect(doc.body?.textContent).toBe("Content");
134
146
  });
135
147
 
136
- it('should have head property with HTMLElement type', () => {
137
- const doc = parseHTML('<html><head><title>Test</title></head><body></body></html>') as Document;
148
+ it("should have head property with HTMLElement type", () => {
149
+ const doc = parseHTML(
150
+ "<html><head><title>Test</title></head><body></body></html>",
151
+ ) as Document;
138
152
 
139
153
  expect(doc.head).toBeTruthy();
140
- expect(doc.head?.tagName).toBe('HEAD');
141
- expect(doc.head?.innerHTML).toBe('<title>Test</title>');
154
+ expect(doc.head?.tagName).toBe("HEAD");
155
+ expect(doc.head?.innerHTML).toBe("<title>Test</title>");
142
156
  });
143
157
 
144
- it('should have documentElement property with HTMLElement type', () => {
145
- const doc = parseHTML('<html><head></head><body></body></html>') as Document;
158
+ it("should have documentElement property with HTMLElement type", () => {
159
+ const doc = parseHTML(
160
+ "<html><head></head><body></body></html>",
161
+ ) as Document;
146
162
 
147
163
  expect(doc.documentElement).toBeTruthy();
148
- expect(doc.documentElement?.tagName).toBe('HTML');
164
+ expect(doc.documentElement?.tagName).toBe("HTML");
149
165
  expect(doc.documentElement?.children.length).toBe(2);
150
166
  });
151
167
  });
152
168
 
153
- describe('DOM mutation and manipulation', () => {
169
+ describe("DOM mutation and manipulation", () => {
154
170
  it("should append an element and update innerHTML accordingly", () => {
155
- const doc = parseHTML('<html><head></head><body></body></html>');
171
+ const doc = parseHTML("<html><head></head><body></body></html>");
156
172
 
157
173
  const body = doc.querySelector("body");
158
174
 
@@ -164,10 +180,7 @@ describe('DOM Extended Functionality', () => {
164
180
 
165
181
  const innerHTML = body?.innerHTML;
166
182
 
167
- expect(innerHTML).toBe('<h1>Hello World</h1>');
168
-
183
+ expect(innerHTML).toBe("<h1>Hello World</h1>");
169
184
  });
170
185
  });
171
-
172
-
173
186
  });
@@ -1,6 +1,6 @@
1
1
  import { describe, it, expect } from "bun:test";
2
2
  import { parseHTML } from "../index";
3
- import { NodeType } from "../src/dom-simulator";
3
+ import { NodeType } from "../src/dom-simulator/index.js";
4
4
 
5
5
  describe("DOM Manipulation - insertBefore", () => {
6
6
  describe("Basic insertBefore functionality", () => {
@@ -255,7 +255,7 @@ describe("DOM Manipulation - insertBefore", () => {
255
255
  describe("insertBefore with node relocation", () => {
256
256
  it("should remove node from previous parent when inserting", () => {
257
257
  const doc = parseHTML(
258
- "<div id='parent1'><span>Child</span></div><div id='parent2'></div>"
258
+ "<div id='parent1'><span>Child</span></div><div id='parent2'></div>",
259
259
  );
260
260
  const parent1 = doc.querySelector("#parent1");
261
261
  const parent2 = doc.querySelector("#parent2");
@@ -270,7 +270,7 @@ describe("DOM Manipulation - insertBefore", () => {
270
270
 
271
271
  it("should update all relationships when moving node between parents", () => {
272
272
  const doc = parseHTML(
273
- "<div id='p1'><span>A</span><span>B</span></div><div id='p2'><span>C</span></div>"
273
+ "<div id='p1'><span>A</span><span>B</span></div><div id='p2'><span>C</span></div>",
274
274
  );
275
275
  const parent1 = doc.querySelector("#p1");
276
276
  const parent2 = doc.querySelector("#p2");
@@ -319,7 +319,7 @@ describe("DOM Manipulation - replaceChild", () => {
319
319
 
320
320
  it("should replace middle child in multiple children", () => {
321
321
  const doc = parseHTML(
322
- "<div><span>First</span><span>Old</span><span>Third</span></div>"
322
+ "<div><span>First</span><span>Old</span><span>Third</span></div>",
323
323
  );
324
324
  const div = doc.querySelector("div");
325
325
  const oldSpan = div.childNodes[1];
@@ -383,7 +383,7 @@ describe("DOM Manipulation - replaceChild", () => {
383
383
  describe("replaceChild sibling relationships", () => {
384
384
  it("should transfer sibling relationships to new node", () => {
385
385
  const doc = parseHTML(
386
- "<div><span>A</span><span>Old</span><span>C</span></div>"
386
+ "<div><span>A</span><span>Old</span><span>C</span></div>",
387
387
  );
388
388
  const div = doc.querySelector("div");
389
389
  const spanA = div.childNodes[0];
@@ -430,7 +430,9 @@ describe("DOM Manipulation - replaceChild", () => {
430
430
  });
431
431
 
432
432
  it("should clear old child's relationships", () => {
433
- const doc = parseHTML("<div><span>A</span><span>Old</span><span>C</span></div>");
433
+ const doc = parseHTML(
434
+ "<div><span>A</span><span>Old</span><span>C</span></div>",
435
+ );
434
436
  const div = doc.querySelector("div");
435
437
  const oldSpan = div.childNodes[1];
436
438
 
@@ -447,7 +449,7 @@ describe("DOM Manipulation - replaceChild", () => {
447
449
  describe("replaceChild element-specific relationships", () => {
448
450
  it("should update children array when replacing element with element", () => {
449
451
  const doc = parseHTML(
450
- "<div><span>A</span><span>Old</span><span>C</span></div>"
452
+ "<div><span>A</span><span>Old</span><span>C</span></div>",
451
453
  );
452
454
  const div = doc.querySelector("div");
453
455
  const oldSpan = div.children[1];
@@ -464,7 +466,7 @@ describe("DOM Manipulation - replaceChild", () => {
464
466
 
465
467
  it("should update element sibling relationships", () => {
466
468
  const doc = parseHTML(
467
- "<div><span>A</span><span>Old</span><span>C</span></div>"
469
+ "<div><span>A</span><span>Old</span><span>C</span></div>",
468
470
  );
469
471
  const div = doc.querySelector("div");
470
472
  const spanA = div.children[0];
@@ -504,7 +506,7 @@ describe("DOM Manipulation - replaceChild", () => {
504
506
 
505
507
  it("should remove from children array when replacing element with text", () => {
506
508
  const doc = parseHTML(
507
- "<div><span>A</span><span>Old</span><span>C</span></div>"
509
+ "<div><span>A</span><span>Old</span><span>C</span></div>",
508
510
  );
509
511
  const div = doc.querySelector("div");
510
512
  const spanA = div.children[0];
@@ -582,7 +584,7 @@ describe("DOM Manipulation - replaceChild", () => {
582
584
  describe("replaceChild with node relocation", () => {
583
585
  it("should remove node from previous parent before replacing", () => {
584
586
  const doc = parseHTML(
585
- "<div id='p1'><span>Moving</span></div><div id='p2'><span>Old</span></div>"
587
+ "<div id='p1'><span>Moving</span></div><div id='p2'><span>Old</span></div>",
586
588
  );
587
589
  const parent1 = doc.querySelector("#p1");
588
590
  const parent2 = doc.querySelector("#p2");
@@ -830,7 +832,9 @@ describe("DOM Manipulation - prepend", () => {
830
832
 
831
833
  describe("prepend with parent relocation", () => {
832
834
  it("should move node from another parent when prepending", () => {
833
- const doc = parseHTML("<div id='a'><span>Child</span></div><div id='b'><span>Other</span></div>");
835
+ const doc = parseHTML(
836
+ "<div id='a'><span>Child</span></div><div id='b'><span>Other</span></div>",
837
+ );
834
838
  const divA = doc.querySelector("#a");
835
839
  const divB = doc.querySelector("#b");
836
840
  const child = divA.querySelector("span");
@@ -844,7 +848,9 @@ describe("DOM Manipulation - prepend", () => {
844
848
  });
845
849
 
846
850
  it("should remove from old parent before prepending", () => {
847
- const doc = parseHTML("<div id='a'><span id='1'>1</span><span id='2'>2</span></div><div id='b'></div>");
851
+ const doc = parseHTML(
852
+ "<div id='a'><span id='1'>1</span><span id='2'>2</span></div><div id='b'></div>",
853
+ );
848
854
  const divA = doc.querySelector("#a");
849
855
  const divB = doc.querySelector("#b");
850
856
  const span1 = doc.querySelector("#1");
@@ -1005,7 +1011,6 @@ describe("DOM Manipulation - append", () => {
1005
1011
  it("should append text node after elements", () => {
1006
1012
  const doc = parseHTML("<div><span>Element</span></div>");
1007
1013
  const div = doc.querySelector("div");
1008
- const span = div.childNodes[0];
1009
1014
 
1010
1015
  const textNode = doc.createTextNode(" Text");
1011
1016
 
@@ -1053,7 +1058,9 @@ describe("DOM Manipulation - append", () => {
1053
1058
 
1054
1059
  describe("append with parent relocation", () => {
1055
1060
  it("should move node from another parent when appending", () => {
1056
- const doc = parseHTML("<div id='a'><span>Child</span></div><div id='b'><span>Other</span></div>");
1061
+ const doc = parseHTML(
1062
+ "<div id='a'><span>Child</span></div><div id='b'><span>Other</span></div>",
1063
+ );
1057
1064
  const divA = doc.querySelector("#a");
1058
1065
  const divB = doc.querySelector("#b");
1059
1066
  const child = divA.querySelector("span");
@@ -1067,7 +1074,9 @@ describe("DOM Manipulation - append", () => {
1067
1074
  });
1068
1075
 
1069
1076
  it("should remove from old parent before appending", () => {
1070
- const doc = parseHTML("<div id='a'><span id='1'>1</span><span id='2'>2</span></div><div id='b'></div>");
1077
+ const doc = parseHTML(
1078
+ "<div id='a'><span id='1'>1</span><span id='2'>2</span></div><div id='b'></div>",
1079
+ );
1071
1080
  const divA = doc.querySelector("#a");
1072
1081
  const divB = doc.querySelector("#b");
1073
1082
  const span2 = doc.querySelector("#2");
@@ -1137,7 +1146,9 @@ describe("DOM Manipulation - append", () => {
1137
1146
  describe("DOM Manipulation - remove", () => {
1138
1147
  describe("Basic remove functionality", () => {
1139
1148
  it("should remove an element from its parent", () => {
1140
- const doc = parseHTML("<div><span id='1'>First</span><span id='2'>Second</span></div>");
1149
+ const doc = parseHTML(
1150
+ "<div><span id='1'>First</span><span id='2'>Second</span></div>",
1151
+ );
1141
1152
  const div = doc.querySelector("div");
1142
1153
  const span1 = doc.querySelector("#1");
1143
1154
 
@@ -1170,7 +1181,9 @@ describe("DOM Manipulation - remove", () => {
1170
1181
  });
1171
1182
 
1172
1183
  it("should remove first child", () => {
1173
- const doc = parseHTML("<div><span>First</span><span>Second</span><span>Third</span></div>");
1184
+ const doc = parseHTML(
1185
+ "<div><span>First</span><span>Second</span><span>Third</span></div>",
1186
+ );
1174
1187
  const div = doc.querySelector("div");
1175
1188
  const first = div.childNodes[0];
1176
1189
 
@@ -1182,7 +1195,9 @@ describe("DOM Manipulation - remove", () => {
1182
1195
  });
1183
1196
 
1184
1197
  it("should remove last child", () => {
1185
- const doc = parseHTML("<div><span>First</span><span>Second</span><span>Third</span></div>");
1198
+ const doc = parseHTML(
1199
+ "<div><span>First</span><span>Second</span><span>Third</span></div>",
1200
+ );
1186
1201
  const div = doc.querySelector("div");
1187
1202
  const last = div.childNodes[2];
1188
1203
 
@@ -1194,7 +1209,9 @@ describe("DOM Manipulation - remove", () => {
1194
1209
  });
1195
1210
 
1196
1211
  it("should remove middle child", () => {
1197
- const doc = parseHTML("<div><span>First</span><span>Second</span><span>Third</span></div>");
1212
+ const doc = parseHTML(
1213
+ "<div><span>First</span><span>Second</span><span>Third</span></div>",
1214
+ );
1198
1215
  const div = doc.querySelector("div");
1199
1216
  const middle = div.childNodes[1];
1200
1217
 
@@ -1208,7 +1225,9 @@ describe("DOM Manipulation - remove", () => {
1208
1225
 
1209
1226
  describe("remove sibling relationships", () => {
1210
1227
  it("should update nextSibling and previousSibling correctly", () => {
1211
- const doc = parseHTML("<div><span>A</span><span>B</span><span>C</span></div>");
1228
+ const doc = parseHTML(
1229
+ "<div><span>A</span><span>B</span><span>C</span></div>",
1230
+ );
1212
1231
  const div = doc.querySelector("div");
1213
1232
  const spanA = div.childNodes[0];
1214
1233
  const spanB = div.childNodes[1];
@@ -1223,7 +1242,9 @@ describe("DOM Manipulation - remove", () => {
1223
1242
  });
1224
1243
 
1225
1244
  it("should update element sibling relationships", () => {
1226
- const doc = parseHTML("<div><span>A</span><span>B</span><span>C</span></div>");
1245
+ const doc = parseHTML(
1246
+ "<div><span>A</span><span>B</span><span>C</span></div>",
1247
+ );
1227
1248
  const div = doc.querySelector("div");
1228
1249
  const spanA = div.childNodes[0];
1229
1250
  const spanB = div.childNodes[1];
@@ -1276,7 +1297,9 @@ describe("DOM Manipulation - remove", () => {
1276
1297
 
1277
1298
  describe("remove synchronization", () => {
1278
1299
  it("should update innerHTML correctly", () => {
1279
- const doc = parseHTML("<div><span>A</span><span>B</span><span>C</span></div>");
1300
+ const doc = parseHTML(
1301
+ "<div><span>A</span><span>B</span><span>C</span></div>",
1302
+ );
1280
1303
  const div = doc.querySelector("div");
1281
1304
  const spanB = div.childNodes[1];
1282
1305
 
@@ -1296,7 +1319,9 @@ describe("DOM Manipulation - remove", () => {
1296
1319
  });
1297
1320
 
1298
1321
  it("should update children array correctly", () => {
1299
- const doc = parseHTML("<div><span>A</span><span>B</span><span>C</span></div>");
1322
+ const doc = parseHTML(
1323
+ "<div><span>A</span><span>B</span><span>C</span></div>",
1324
+ );
1300
1325
  const div = doc.querySelector("div");
1301
1326
  const spanB = div.children[1];
1302
1327
 
@@ -1310,7 +1335,9 @@ describe("DOM Manipulation - remove", () => {
1310
1335
 
1311
1336
  describe("remove multiple elements", () => {
1312
1337
  it("should remove multiple elements sequentially", () => {
1313
- const doc = parseHTML("<div><span>A</span><span>B</span><span>C</span></div>");
1338
+ const doc = parseHTML(
1339
+ "<div><span>A</span><span>B</span><span>C</span></div>",
1340
+ );
1314
1341
  const div = doc.querySelector("div");
1315
1342
  const spanA = div.childNodes[0];
1316
1343
  const spanB = div.childNodes[1];
package/tests/dom.test.ts CHANGED
@@ -7,8 +7,8 @@ import {
7
7
  hasAttribute,
8
8
  setAttribute,
9
9
  removeAttribute,
10
- } from "../src/dom-simulator";
11
- import { parse } from "../src/parser";
10
+ createDocument,
11
+ } from "../src/dom-simulator/index.js";
12
12
 
13
13
  function getBodyContent(doc: any): any {
14
14
  return doc.body?.firstChild;
@@ -67,7 +67,9 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
67
67
  });
68
68
 
69
69
  it("should parse comments", () => {
70
- const doc = parseHTML("<div><!-- This is a comment --></div><p>Hello</p>");
70
+ const doc = parseHTML(
71
+ "<div><!-- This is a comment --></div><p>Hello</p>",
72
+ );
71
73
 
72
74
  const body = doc.body;
73
75
  expect(body.childNodes.length).toBe(2);
@@ -93,7 +95,7 @@ describe("DOM Simulator - Phase 1: Structure and Conversion", () => {
93
95
 
94
96
  it("should set sibling relationships correctly", () => {
95
97
  const doc = parseHTML(
96
- "<div><p>First</p><span>Second</span><em>Third</em></div>"
98
+ "<div><p>First</p><span>Second</span><em>Third</em></div>",
97
99
  );
98
100
 
99
101
  const div = getBodyContent(doc);
@@ -181,7 +183,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
181
183
 
182
184
  it("should get text from deeply nested elements", () => {
183
185
  const doc = parseHTML(
184
- "<div>Start <p>Middle <em>Deep <strong>Deeper</strong></em></p> End</div>"
186
+ "<div>Start <p>Middle <em>Deep <strong>Deeper</strong></em></p> End</div>",
185
187
  );
186
188
  const div = getBodyContent(doc);
187
189
 
@@ -213,7 +215,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
213
215
  describe("Attribute functions", () => {
214
216
  it("should get existing attributes", () => {
215
217
  const doc = parseHTML(
216
- '<div id="test" class="highlight" data-value="123">Content</div>'
218
+ '<div id="test" class="highlight" data-value="123">Content</div>',
217
219
  );
218
220
  const div = getBodyContent(doc) as any;
219
221
 
@@ -265,7 +267,7 @@ describe("DOM Simulator - Phase 2: Navigation and Attributes", () => {
265
267
 
266
268
  it("should remove attributes", () => {
267
269
  const doc = parseHTML(
268
- '<div id="test" class="highlight" data-value="123">Content</div>'
270
+ '<div id="test" class="highlight" data-value="123">Content</div>',
269
271
  );
270
272
  const div = getBodyContent(doc) as any;
271
273
 
@@ -353,7 +355,7 @@ describe("DOM extra tests", () => {
353
355
  });
354
356
 
355
357
  it("should create a new Document", () => {
356
- const doc = parseHTML();
358
+ const doc = createDocument();
357
359
  expect(doc).toBeTruthy();
358
360
  expect(doc.nodeType).toBe(NodeType.DOCUMENT_NODE);
359
361
  expect(doc.nodeName).toBe("#document");
@@ -370,7 +372,7 @@ describe("DOM extra tests", () => {
370
372
 
371
373
  expect(doc.head?.querySelector("title")?.textContent).toBe("Sample Page");
372
374
  expect(doc.head?.querySelector("meta")?.getAttribute("charset")).toBe(
373
- "UTF-8"
375
+ "UTF-8",
374
376
  );
375
377
  });
376
378
 
@@ -387,7 +389,7 @@ describe("DOM extra tests", () => {
387
389
  expect(paragraphs.length).toBe(2);
388
390
  expect(paragraphs[0]?.textContent).toBe("First paragraph.");
389
391
  expect(paragraphs[1]?.textContent).toBe(
390
- "Second paragraph with formatting."
392
+ "Second paragraph with formatting.",
391
393
  );
392
394
 
393
395
  const strong = section.querySelector("strong")!;
@@ -408,7 +410,7 @@ describe("DOM extra tests", () => {
408
410
 
409
411
  const main = doc.body?.querySelector("main")!;
410
412
  const commentNode = (main.childNodes as any).find(
411
- (n: any) => n.nodeType === NodeType.COMMENT_NODE
413
+ (n: any) => n.nodeType === NodeType.COMMENT_NODE,
412
414
  );
413
415
  expect(commentNode).toBeTruthy();
414
416
  expect(commentNode?.nodeValue?.trim()).toBe("Footer note");
@@ -425,7 +427,7 @@ describe("DOM extra tests", () => {
425
427
  const header = doc.body?.querySelector("#main-header")!;
426
428
 
427
429
  const clonedFooter = (doc.body?.querySelector("footer") as any).cloneNode(
428
- true
430
+ true,
429
431
  );
430
432
  expect(clonedFooter.nodeName).toBe("FOOTER");
431
433
  expect(clonedFooter.querySelector("a")?.textContent).toBe("Email us");
@@ -480,7 +482,7 @@ describe("DOM extra tests", () => {
480
482
 
481
483
  expect(container.querySelector("h2")?.textContent).toBe("Dynamic Content");
482
484
  expect(container.querySelector("p")?.textContent).toBe(
483
- "This is a dynamically added paragraph."
485
+ "This is a dynamically added paragraph.",
484
486
  );
485
487
  expect(container.querySelectorAll("li").length).toBe(2);
486
488
  });
@@ -0,0 +1,33 @@
1
+ import { expect, it } from "bun:test";
2
+ import { detectEncoding } from "../../src/encoding/index.ts";
3
+
4
+ it("should detect charset from meta tag", () => {
5
+ const html = '<html><head><meta charset="utf-8"></head></html>';
6
+ expect(detectEncoding(html)).toBe("utf-8");
7
+ });
8
+
9
+ it("should detect charset from meta tag with single quotes", () => {
10
+ const html = "<html><head><meta charset='iso-8859-1'></head></html>";
11
+ expect(detectEncoding(html)).toBe("windows-1252");
12
+ });
13
+
14
+ it("should detect charset from content-type meta", () => {
15
+ const html =
16
+ '<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head></html>';
17
+ expect(detectEncoding(html)).toBe("utf-8");
18
+ });
19
+
20
+ it("should return windows-1252 as default", () => {
21
+ const html = "<html><head></head></html>";
22
+ expect(detectEncoding(html)).toBe("windows-1252");
23
+ });
24
+
25
+ it("should normalize encoding aliases", () => {
26
+ const html = '<html><head><meta charset="UTF-8"></head></html>';
27
+ expect(detectEncoding(html)).toBe("utf-8");
28
+ });
29
+
30
+ it("should handle case insensitive charset", () => {
31
+ const html = '<html><head><meta CHARSET="utf-8"></head></html>';
32
+ expect(detectEncoding(html)).toBe("utf-8");
33
+ });
@@ -100,8 +100,8 @@ describe("Google DOM Parsing Test", () => {
100
100
  for (let i = 0; i < Math.min(navLinks.length, 5); i++) {
101
101
  const link = navLinks[i];
102
102
  if (link) {
103
- const href = link.getAttribute("href");
104
- const text = link.textContent?.trim();
103
+ link.getAttribute("href");
104
+ link.textContent?.trim();
105
105
  }
106
106
  }
107
107
  }