@tkeron/html-parser 1.1.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/.github/workflows/npm_deploy.yml +14 -4
  2. package/README.md +6 -6
  3. package/bun.lock +6 -8
  4. package/check-versions.ts +147 -0
  5. package/index.ts +4 -8
  6. package/package.json +5 -6
  7. package/src/dom-simulator/append-child.ts +130 -0
  8. package/src/dom-simulator/append.ts +18 -0
  9. package/src/dom-simulator/attributes.ts +23 -0
  10. package/src/dom-simulator/clone-node.ts +51 -0
  11. package/src/dom-simulator/convert-ast-node-to-dom.ts +37 -0
  12. package/src/dom-simulator/create-cdata.ts +18 -0
  13. package/src/dom-simulator/create-comment.ts +23 -0
  14. package/src/dom-simulator/create-doctype.ts +24 -0
  15. package/src/dom-simulator/create-document.ts +81 -0
  16. package/src/dom-simulator/create-element.ts +195 -0
  17. package/src/dom-simulator/create-processing-instruction.ts +19 -0
  18. package/src/dom-simulator/create-temp-parent.ts +9 -0
  19. package/src/dom-simulator/create-text-node.ts +23 -0
  20. package/src/dom-simulator/escape-text-content.ts +6 -0
  21. package/src/dom-simulator/find-special-elements.ts +14 -0
  22. package/src/dom-simulator/get-text-content.ts +18 -0
  23. package/src/dom-simulator/index.ts +36 -0
  24. package/src/dom-simulator/inner-outer-html.ts +182 -0
  25. package/src/dom-simulator/insert-after.ts +20 -0
  26. package/src/dom-simulator/insert-before.ts +108 -0
  27. package/src/dom-simulator/matches.ts +26 -0
  28. package/src/dom-simulator/node-types.ts +26 -0
  29. package/src/dom-simulator/prepend.ts +24 -0
  30. package/src/dom-simulator/remove-child.ts +68 -0
  31. package/src/dom-simulator/remove.ts +7 -0
  32. package/src/dom-simulator/replace-child.ts +152 -0
  33. package/src/dom-simulator/set-text-content.ts +33 -0
  34. package/src/dom-simulator/update-element-content.ts +56 -0
  35. package/src/dom-simulator.ts +12 -1126
  36. package/src/encoding/constants.ts +8 -0
  37. package/src/encoding/detect-encoding.ts +21 -0
  38. package/src/encoding/index.ts +1 -0
  39. package/src/encoding/normalize-encoding.ts +6 -0
  40. package/src/html-entities.ts +2127 -0
  41. package/src/index.ts +5 -5
  42. package/src/parser/adoption-agency-helpers.ts +145 -0
  43. package/src/parser/constants.ts +137 -0
  44. package/src/parser/dom-to-ast.ts +79 -0
  45. package/src/parser/index.ts +9 -0
  46. package/src/parser/parse.ts +772 -0
  47. package/src/parser/types.ts +56 -0
  48. package/src/selectors/find-elements-descendant.ts +47 -0
  49. package/src/selectors/index.ts +2 -0
  50. package/src/selectors/matches-selector.ts +12 -0
  51. package/src/selectors/matches-token.ts +27 -0
  52. package/src/selectors/parse-selector.ts +48 -0
  53. package/src/selectors/query-selector-all.ts +43 -0
  54. package/src/selectors/query-selector.ts +6 -0
  55. package/src/selectors/types.ts +10 -0
  56. package/src/serializer/attributes.ts +74 -0
  57. package/src/serializer/escape.ts +13 -0
  58. package/src/serializer/index.ts +1 -0
  59. package/src/serializer/serialize-tokens.ts +511 -0
  60. package/src/tokenizer/calculate-position.ts +10 -0
  61. package/src/tokenizer/constants.ts +11 -0
  62. package/src/tokenizer/decode-entities.ts +64 -0
  63. package/src/tokenizer/index.ts +2 -0
  64. package/src/tokenizer/parse-attributes.ts +74 -0
  65. package/src/tokenizer/tokenize.ts +165 -0
  66. package/src/tokenizer/types.ts +25 -0
  67. package/tests/adoption-agency-helpers.test.ts +304 -0
  68. package/tests/advanced.test.ts +242 -221
  69. package/tests/cloneNode.test.ts +19 -66
  70. package/tests/custom-elements-head.test.ts +54 -55
  71. package/tests/dom-extended.test.ts +77 -64
  72. package/tests/dom-manipulation.test.ts +51 -24
  73. package/tests/dom.test.ts +15 -13
  74. package/tests/encoding/detect-encoding.test.ts +33 -0
  75. package/tests/google-dom.test.ts +2 -2
  76. package/tests/helpers/tokenizer-adapter.test.ts +29 -43
  77. package/tests/helpers/tokenizer-adapter.ts +36 -33
  78. package/tests/helpers/tree-adapter.test.ts +20 -20
  79. package/tests/helpers/tree-adapter.ts +34 -24
  80. package/tests/html-entities-text.test.ts +6 -2
  81. package/tests/innerhtml-void-elements.test.ts +52 -36
  82. package/tests/outerHTML-replacement.test.ts +37 -65
  83. package/tests/parser/dom-to-ast.test.ts +109 -0
  84. package/tests/parser/parse.test.ts +139 -0
  85. package/tests/parser.test.ts +281 -217
  86. package/tests/selectors/query-selector-all.test.ts +39 -0
  87. package/tests/selectors/query-selector.test.ts +42 -0
  88. package/tests/serializer/attributes.test.ts +132 -0
  89. package/tests/serializer/escape.test.ts +51 -0
  90. package/tests/serializer/serialize-tokens.test.ts +80 -0
  91. package/tests/serializer-core.test.ts +6 -6
  92. package/tests/serializer-injectmeta.test.ts +6 -6
  93. package/tests/serializer-optionaltags.test.ts +9 -6
  94. package/tests/serializer-options.test.ts +6 -6
  95. package/tests/serializer-whitespace.test.ts +6 -6
  96. package/tests/tokenizer/calculate-position.test.ts +34 -0
  97. package/tests/tokenizer/decode-entities.test.ts +31 -0
  98. package/tests/tokenizer/parse-attributes.test.ts +44 -0
  99. package/tests/tokenizer/tokenize.test.ts +757 -0
  100. package/tests/tokenizer-namedEntities.test.ts +10 -7
  101. package/tests/tokenizer-pendingSpecChanges.test.ts +10 -7
  102. package/tests/tokenizer.test.ts +268 -256
  103. package/tests/tree-construction-adoption01.test.ts +25 -16
  104. package/tests/tree-construction-adoption02.test.ts +30 -19
  105. package/tests/tree-construction-domjs-unsafe.test.ts +6 -4
  106. package/tests/tree-construction-entities02.test.ts +18 -16
  107. package/tests/tree-construction-html5test-com.test.ts +16 -10
  108. package/tests/tree-construction-math.test.ts +11 -9
  109. package/tests/tree-construction-namespace-sensitivity.test.ts +11 -9
  110. package/tests/tree-construction-noscript01.test.ts +11 -9
  111. package/tests/tree-construction-ruby.test.ts +6 -4
  112. package/tests/tree-construction-scriptdata01.test.ts +6 -4
  113. package/tests/tree-construction-svg.test.ts +6 -4
  114. package/tests/tree-construction-template.test.ts +6 -4
  115. package/tests/tree-construction-tests10.test.ts +6 -4
  116. package/tests/tree-construction-tests11.test.ts +6 -4
  117. package/tests/tree-construction-tests20.test.ts +7 -4
  118. package/tests/tree-construction-tests21.test.ts +7 -4
  119. package/tests/tree-construction-tests23.test.ts +7 -4
  120. package/tests/tree-construction-tests24.test.ts +7 -4
  121. package/tests/tree-construction-tests5.test.ts +6 -5
  122. package/tests/tree-construction-tests6.test.ts +6 -5
  123. package/tests/tree-construction-tests_innerHTML_1.test.ts +6 -5
  124. package/tests/void-elements.test.ts +85 -40
  125. package/tsconfig.json +1 -1
  126. package/src/css-selector.ts +0 -185
  127. package/src/encoding.ts +0 -39
  128. package/src/parser.ts +0 -682
  129. package/src/serializer.ts +0 -450
  130. package/src/tokenizer.ts +0 -325
  131. package/tests/selectors.test.ts +0 -128
@@ -1,191 +1,193 @@
1
1
  // @ts-nocheck
2
- import { expect, it, describe } from 'bun:test';
3
- import { tokenize } from '../src/tokenizer';
4
- import { parse, domToAST, ASTNodeType, type ASTNode } from '../src/parser';
5
- import { file } from 'bun';
2
+ import { expect, it, describe } from "bun:test";
3
+ import { tokenize } from "../src/tokenizer/index.js";
4
+ import { parse, domToAST, ASTNodeType, type ASTNode } from "../src/parser";
5
+ import { file } from "bun";
6
6
 
7
7
  function parseToAST(html: string): ASTNode {
8
8
  const tokens = tokenize(html);
9
9
  const dom = parse(tokens);
10
10
  const ast = domToAST(dom);
11
-
12
- const hasExplicitHtml = html.includes('<html') || html.includes('<!DOCTYPE') || html.includes('<!doctype');
11
+
12
+ const hasExplicitHtml =
13
+ html.includes("<html") ||
14
+ html.includes("<!DOCTYPE") ||
15
+ html.includes("<!doctype");
13
16
  if (hasExplicitHtml) {
14
17
  return ast;
15
18
  }
16
-
17
- const htmlEl = ast.children?.find(c => c.tagName === 'html');
19
+
20
+ const htmlEl = ast.children?.find((c) => c.tagName === "html");
18
21
  if (htmlEl) {
19
- const bodyEl = htmlEl.children?.find(c => c.tagName === 'body');
22
+ const bodyEl = htmlEl.children?.find((c) => c.tagName === "body");
20
23
  if (bodyEl && bodyEl.children) {
21
- const nonHtmlChildren = ast.children?.filter(c => c.tagName !== 'html' && c.type !== 'doctype') || [];
22
- return { type: ASTNodeType.Document, children: [...nonHtmlChildren, ...bodyEl.children] };
24
+ const nonHtmlChildren =
25
+ ast.children?.filter(
26
+ (c) => c.tagName !== "html" && c.type !== "doctype",
27
+ ) || [];
28
+ return {
29
+ type: ASTNodeType.Document,
30
+ children: [...nonHtmlChildren, ...bodyEl.children],
31
+ };
23
32
  }
24
33
  }
25
34
  return ast;
26
35
  }
27
36
 
28
- describe('HTML Parser', () => {
29
-
30
- describe('Basic Elements', () => {
31
- it('should parse simple element', () => {
32
- const ast = parseToAST('<div></div>');
37
+ describe("HTML Parser", () => {
38
+ describe("Basic Elements", () => {
39
+ it("should parse simple element", () => {
40
+ const ast = parseToAST("<div></div>");
33
41
 
34
42
  expect(ast.type).toBe(ASTNodeType.Document);
35
43
  expect(ast.children).toHaveLength(1);
36
44
 
37
45
  const divElement = ast.children![0]!;
38
46
  expect(divElement.type).toBe(ASTNodeType.Element);
39
- expect(divElement.tagName).toBe('div');
47
+ expect(divElement.tagName).toBe("div");
40
48
  expect(divElement.children).toHaveLength(0);
41
49
  });
42
50
 
43
- it('should parse element with attributes', () => {
51
+ it("should parse element with attributes", () => {
44
52
  const ast = parseToAST('<div class="container" id="main"></div>');
45
53
 
46
54
  const divElement = ast.children![0]!;
47
55
  expect(divElement.attributes).toEqual({
48
- class: 'container',
49
- id: 'main'
56
+ class: "container",
57
+ id: "main",
50
58
  });
51
59
  });
52
60
 
53
- it('should parse self-closing elements', () => {
61
+ it("should parse self-closing elements", () => {
54
62
  const ast = parseToAST('<img src="test.jpg" alt="test"/>');
55
63
 
56
64
  const imgElement = ast.children![0]!;
57
65
  expect(imgElement.type).toBe(ASTNodeType.Element);
58
- expect(imgElement.tagName).toBe('img');
66
+ expect(imgElement.tagName).toBe("img");
59
67
  expect((imgElement as any).isSelfClosing).toBe(true);
60
68
  expect(imgElement.attributes).toEqual({
61
- src: 'test.jpg',
62
- alt: 'test'
69
+ src: "test.jpg",
70
+ alt: "test",
63
71
  });
64
72
  });
65
73
 
66
- it('should parse void elements correctly', () => {
74
+ it("should parse void elements correctly", () => {
67
75
  const ast = parseToAST('<br><hr><input type="text">');
68
76
 
69
77
  expect(ast.children).toHaveLength(3);
70
- expect(ast.children![0]!.tagName).toBe('br');
78
+ expect(ast.children![0]!.tagName).toBe("br");
71
79
  expect((ast.children![0]! as any).isSelfClosing).toBe(true);
72
- expect(ast.children![1]!.tagName).toBe('hr');
80
+ expect(ast.children![1]!.tagName).toBe("hr");
73
81
  expect((ast.children![1]! as any).isSelfClosing).toBe(true);
74
- expect(ast.children![2]!.tagName).toBe('input');
82
+ expect(ast.children![2]!.tagName).toBe("input");
75
83
  expect((ast.children![2]! as any).isSelfClosing).toBe(true);
76
84
  });
77
85
  });
78
86
 
79
- describe('Nested Elements', () => {
80
- it('should parse nested elements', () => {
81
- const ast = parseToAST('<div><p>Hello</p></div>');
87
+ describe("Nested Elements", () => {
88
+ it("should parse nested elements", () => {
89
+ const ast = parseToAST("<div><p>Hello</p></div>");
82
90
 
83
91
  const divElement = ast.children![0]!;
84
- expect(divElement.tagName).toBe('div');
92
+ expect(divElement.tagName).toBe("div");
85
93
  expect(divElement.children).toHaveLength(1);
86
94
 
87
95
  const pElement = divElement.children![0]!;
88
- expect(pElement.tagName).toBe('p');
96
+ expect(pElement.tagName).toBe("p");
89
97
  expect(pElement.children).toHaveLength(1);
90
98
 
91
99
  const textNode = pElement.children![0]!;
92
100
  expect(textNode.type).toBe(ASTNodeType.Text);
93
- expect((textNode as any).content).toBe('Hello');
101
+ expect((textNode as any).content).toBe("Hello");
94
102
  });
95
103
 
96
- it('should parse deeply nested elements', () => {
97
- const ast = parseToAST('<div><section><article><h1>Title</h1></article></section></div>');
104
+ it("should parse deeply nested elements", () => {
105
+ const ast = parseToAST(
106
+ "<div><section><article><h1>Title</h1></article></section></div>",
107
+ );
98
108
 
99
109
  const divElement = ast.children![0]!;
100
110
  const sectionElement = divElement.children![0]!;
101
111
  const articleElement = sectionElement.children![0]!;
102
112
  const h1Element = articleElement.children![0]!;
103
113
 
104
- expect(h1Element.tagName).toBe('h1');
105
- expect((h1Element.children![0]! as any).content).toBe('Title');
114
+ expect(h1Element.tagName).toBe("h1");
115
+ expect((h1Element.children![0]! as any).content).toBe("Title");
106
116
  });
107
117
 
108
- it('should handle multiple siblings', () => {
109
- const ast = parseToAST('<div><p>First</p><p>Second</p><p>Third</p></div>');
118
+ it("should handle multiple siblings", () => {
119
+ const ast = parseToAST(
120
+ "<div><p>First</p><p>Second</p><p>Third</p></div>",
121
+ );
110
122
 
111
123
  const divElement = ast.children![0]!;
112
124
  expect(divElement.children).toHaveLength(3);
113
125
 
114
- expect(divElement.children![0]!.tagName).toBe('p');
115
- expect((divElement.children![0]!.children![0] as any).content).toBe('First');
116
- expect((divElement.children![1]!.children![0] as any).content).toBe('Second');
117
- expect((divElement.children![2]!.children![0] as any).content).toBe('Third');
126
+ expect(divElement.children![0]!.tagName).toBe("p");
127
+ expect((divElement.children![0]!.children![0] as any).content).toBe(
128
+ "First",
129
+ );
130
+ expect((divElement.children![1]!.children![0] as any).content).toBe(
131
+ "Second",
132
+ );
133
+ expect((divElement.children![2]!.children![0] as any).content).toBe(
134
+ "Third",
135
+ );
118
136
  });
119
137
  });
120
138
 
121
- describe('Text Content', () => {
122
- it('should parse text content', () => {
123
- const ast = parseToAST('Hello World');
139
+ describe("Text Content", () => {
140
+ it("should parse text content", () => {
141
+ const ast = parseToAST("Hello World");
124
142
 
125
143
  expect(ast.children).toHaveLength(1);
126
144
  const textNode = ast.children![0]!;
127
145
  expect(textNode.type).toBe(ASTNodeType.Text);
128
- expect((textNode as any).content).toBe('Hello World');
146
+ expect((textNode as any).content).toBe("Hello World");
129
147
  });
130
148
 
131
- it('should parse mixed text and elements', () => {
132
- const ast = parseToAST('Before <strong>bold</strong> after');
149
+ it("should parse mixed text and elements", () => {
150
+ const ast = parseToAST("Before <strong>bold</strong> after");
133
151
 
134
152
  expect(ast.children).toHaveLength(3);
135
- expect((ast.children![0]! as any).content).toBe('Before ');
136
- expect(ast.children![1]!.tagName).toBe('strong');
137
- expect((ast.children![1]!.children![0]! as any).content).toBe('bold');
138
- expect((ast.children![2]! as any).content).toBe(' after');
153
+ expect((ast.children![0]! as any).content).toBe("Before ");
154
+ expect(ast.children![1]!.tagName).toBe("strong");
155
+ expect((ast.children![1]!.children![0]! as any).content).toBe("bold");
156
+ expect((ast.children![2]! as any).content).toBe(" after");
139
157
  });
140
158
 
141
- it('should handle entities in text', () => {
142
- const ast = parseToAST('<p>&amp; &lt; &gt;</p>');
159
+ it("should handle entities in text", () => {
160
+ const ast = parseToAST("<p>&amp; &lt; &gt;</p>");
143
161
 
144
162
  const pElement = ast.children![0]!;
145
163
  const textNode = pElement.children![0]!;
146
- expect((textNode as any).content).toBe('& < >');
164
+ expect((textNode as any).content).toBe("& < >");
147
165
  });
148
166
  });
149
167
 
150
- describe('Comments and Special Nodes', () => {
151
- it('should parse HTML comments', () => {
152
- const ast = parseToAST('<!-- This is a comment -->');
168
+ describe("Comments and Special Nodes", () => {
169
+ it("should parse HTML comments", () => {
170
+ const ast = parseToAST("<!-- This is a comment -->");
153
171
 
154
172
  expect(ast.children).toHaveLength(1);
155
173
  const commentNode = ast.children![0]!;
156
174
  expect(commentNode.type).toBe(ASTNodeType.Comment);
157
- expect((commentNode as any).content).toBe(' This is a comment ');
175
+ expect((commentNode as any).content).toBe(" This is a comment ");
158
176
  });
159
177
 
160
- it('should parse DOCTYPE', () => {
161
- const ast = parseToAST('<!DOCTYPE html>');
178
+ it("should parse DOCTYPE", () => {
179
+ const ast = parseToAST("<!DOCTYPE html>");
162
180
 
163
- const doctypeNode = ast.children?.find(c => c.type === ASTNodeType.Doctype);
181
+ const doctypeNode = ast.children?.find(
182
+ (c) => c.type === ASTNodeType.Doctype,
183
+ );
164
184
  expect(doctypeNode).toBeDefined();
165
- expect((doctypeNode as any).content).toBe('html');
166
- });
167
-
168
- it.skip('should parse CDATA sections', () => {
169
- const ast = parseToAST('<![CDATA[Some raw data]]>');
170
-
171
- expect(ast.children).toHaveLength(1);
172
- const cdataNode = ast.children![0]!;
173
- expect(cdataNode.type).toBe(ASTNodeType.CDATA);
174
- expect((cdataNode as any).content).toBe('Some raw data');
175
- });
176
-
177
- it.skip('should parse processing instructions', () => {
178
- const ast = parseToAST('<?xml version="1.0"?>');
179
-
180
- expect(ast.children).toHaveLength(1);
181
- const piNode = ast.children![0]!;
182
- expect(piNode.type).toBe('processing-instruction' as any);
183
- expect((piNode as any).content).toBe('<?xml version="1.0"');
185
+ expect((doctypeNode as any).content).toBe("html");
184
186
  });
185
187
  });
186
188
 
187
- describe('Complete HTML Documents', () => {
188
- it('should parse complete HTML document', () => {
189
+ describe("Complete HTML Documents", () => {
190
+ it("should parse complete HTML document", () => {
189
191
  const html = `<!DOCTYPE html>
190
192
  <html lang="en">
191
193
  <head>
@@ -204,96 +206,105 @@ describe('HTML Parser', () => {
204
206
  expect(ast.children!.length).toBeGreaterThan(1);
205
207
 
206
208
  const htmlElement = ast.children!.find(
207
- child => child.type === ASTNodeType.Element && child.tagName === 'html'
209
+ (child) =>
210
+ child.type === ASTNodeType.Element && child.tagName === "html",
208
211
  )!;
209
212
 
210
213
  expect(htmlElement).toBeDefined();
211
- expect(htmlElement.attributes!.lang).toBe('en');
214
+ expect(htmlElement.attributes!.lang).toBe("en");
212
215
 
213
216
  const elementChildren = htmlElement.children!.filter(
214
- child => child.type === ASTNodeType.Element
217
+ (child) => child.type === ASTNodeType.Element,
215
218
  );
216
219
  expect(elementChildren).toHaveLength(2);
217
220
 
218
- const headElement = elementChildren.find(child => child.tagName === 'head')!;
219
- const bodyElement = elementChildren.find(child => child.tagName === 'body')!;
221
+ const headElement = elementChildren.find(
222
+ (child) => child.tagName === "head",
223
+ )!;
224
+ const bodyElement = elementChildren.find(
225
+ (child) => child.tagName === "body",
226
+ )!;
220
227
 
221
228
  expect(headElement).toBeDefined();
222
229
  expect(bodyElement).toBeDefined();
223
230
  });
224
231
  });
225
232
 
226
- describe('real web scenarios', () => {
227
- it('should parse real-world HTML', async () => {
233
+ describe("real web scenarios", () => {
234
+ it("should parse real-world HTML", async () => {
228
235
  const html = await file("./tests/test-page-0.txt").text();
229
236
  const ast = parseToAST(html);
230
237
  });
231
238
  });
232
239
 
233
- describe('Error Recovery', () => {
234
- it('should handle unclosed tags', () => {
235
- const ast = parseToAST('<div><p>Unclosed paragraph</div>');
240
+ describe("Error Recovery", () => {
241
+ it("should handle unclosed tags", () => {
242
+ const ast = parseToAST("<div><p>Unclosed paragraph</div>");
236
243
 
237
244
  const divElement = ast.children![0]!;
238
- expect(divElement.tagName).toBe('div');
245
+ expect(divElement.tagName).toBe("div");
239
246
 
240
247
  const pElement = divElement.children![0]!;
241
- expect(pElement.tagName).toBe('p');
248
+ expect(pElement.tagName).toBe("p");
242
249
  });
243
250
 
244
- it('should handle unexpected closing tags', () => {
245
- const ast = parseToAST('<div></span></div>');
251
+ it("should handle unexpected closing tags", () => {
252
+ const ast = parseToAST("<div></span></div>");
246
253
 
247
254
  const divElement = ast.children![0]!;
248
- expect(divElement.tagName).toBe('div');
255
+ expect(divElement.tagName).toBe("div");
249
256
  });
250
257
 
251
- it('should handle malformed attributes', () => {
258
+ it("should handle malformed attributes", () => {
252
259
  const ast = parseToAST('<div class="test id="main">Content</div>');
253
260
 
254
261
  const divElement = ast.children![0]!;
255
- expect(divElement.tagName).toBe('div');
262
+ expect(divElement.tagName).toBe("div");
256
263
  expect(divElement.attributes).toBeDefined();
257
264
  });
258
265
  });
259
266
 
260
- describe('Auto-closing Tags', () => {
261
- it('should auto-close list items', () => {
262
- const ast = parseToAST('<ul><li>First<li>Second</ul>');
267
+ describe("Auto-closing Tags", () => {
268
+ it("should auto-close list items", () => {
269
+ const ast = parseToAST("<ul><li>First<li>Second</ul>");
263
270
 
264
271
  const ulElement = ast.children![0]!;
265
272
  const liElements = ulElement.children!.filter(
266
- child => child.type === ASTNodeType.Element && child.tagName === 'li'
273
+ (child) => child.type === ASTNodeType.Element && child.tagName === "li",
267
274
  );
268
275
 
269
276
  expect(liElements).toHaveLength(2);
270
- expect((liElements[0]!.children![0]! as any).content).toBe('First');
271
- expect((liElements[1]!.children![0]! as any).content).toBe('Second');
277
+ expect((liElements[0]!.children![0]! as any).content).toBe("First");
278
+ expect((liElements[1]!.children![0]! as any).content).toBe("Second");
272
279
  });
273
280
 
274
- it('should auto-close paragraph tags', () => {
275
- const ast = parseToAST('<p>First paragraph<p>Second paragraph');
281
+ it("should auto-close paragraph tags", () => {
282
+ const ast = parseToAST("<p>First paragraph<p>Second paragraph");
276
283
 
277
284
  const pElements = ast.children!.filter(
278
- child => child.type === ASTNodeType.Element && child.tagName === 'p'
285
+ (child) => child.type === ASTNodeType.Element && child.tagName === "p",
279
286
  );
280
287
 
281
288
  expect(pElements).toHaveLength(2);
282
- expect((pElements[0]!.children![0]! as any).content).toBe('First paragraph');
283
- expect((pElements[1]!.children![0]! as any).content).toBe('Second paragraph');
289
+ expect((pElements[0]!.children![0]! as any).content).toBe(
290
+ "First paragraph",
291
+ );
292
+ expect((pElements[1]!.children![0]! as any).content).toBe(
293
+ "Second paragraph",
294
+ );
284
295
  });
285
296
  });
286
297
 
287
- describe('Whitespace Handling', () => {
288
- it('should preserve significant whitespace', () => {
289
- const ast = parseToAST('<p> Hello World </p>');
298
+ describe("Whitespace Handling", () => {
299
+ it("should preserve significant whitespace", () => {
300
+ const ast = parseToAST("<p> Hello World </p>");
290
301
 
291
302
  const pElement = ast.children![0]!;
292
303
  const textNode = pElement.children![0]!;
293
- expect((textNode as any).content).toBe(' Hello World ');
304
+ expect((textNode as any).content).toBe(" Hello World ");
294
305
  });
295
306
 
296
- it('should skip insignificant whitespace', () => {
307
+ it("should skip insignificant whitespace", () => {
297
308
  const ast = parseToAST(`<html>
298
309
  <head>
299
310
  <title>Test</title>
@@ -301,11 +312,13 @@ describe('HTML Parser', () => {
301
312
  </html>`);
302
313
 
303
314
  const htmlElement = ast.children!.find(
304
- child => child.type === ASTNodeType.Element && child.tagName === 'html'
315
+ (child) =>
316
+ child.type === ASTNodeType.Element && child.tagName === "html",
305
317
  )!;
306
318
 
307
319
  const headElement = htmlElement.children!.find(
308
- child => child.type === ASTNodeType.Element && child.tagName === 'head'
320
+ (child) =>
321
+ child.type === ASTNodeType.Element && child.tagName === "head",
309
322
  )!;
310
323
 
311
324
  expect(headElement).toBeDefined();
@@ -313,158 +326,185 @@ describe('HTML Parser', () => {
313
326
  });
314
327
 
315
328
  describe("complete web page", () => {
316
- it('should parse a complete web page', async () => {
329
+ it("should parse a complete web page", async () => {
317
330
  const html = await file("./tests/test-page-0.txt").text();
318
331
  const ast = parseToAST(html);
319
332
  expect(ast.children!.length).toBeGreaterThanOrEqual(1);
320
333
  const htmlElement = ast.children!.find(
321
- child => child.type === ASTNodeType.Element && child.tagName === 'html'
334
+ (child) =>
335
+ child.type === ASTNodeType.Element && child.tagName === "html",
322
336
  )!;
323
337
  expect(htmlElement).toBeDefined();
324
338
  expect(htmlElement.type).toBe(ASTNodeType.Element);
325
- expect(htmlElement.tagName).toBe('html');
326
- expect(htmlElement.attributes!.lang).toBe('en');
339
+ expect(htmlElement.tagName).toBe("html");
340
+ expect(htmlElement.attributes!.lang).toBe("en");
327
341
  const headElement = htmlElement.children!.find(
328
- child => child.type === ASTNodeType.Element && child.tagName === 'head'
342
+ (child) =>
343
+ child.type === ASTNodeType.Element && child.tagName === "head",
329
344
  )!;
330
345
  const bodyElement = htmlElement.children!.find(
331
- child => child.type === ASTNodeType.Element && child.tagName === 'body'
346
+ (child) =>
347
+ child.type === ASTNodeType.Element && child.tagName === "body",
332
348
  )!;
333
349
  expect(headElement).toBeDefined();
334
350
  expect(bodyElement).toBeDefined();
335
- })
336
- })
351
+ });
352
+ });
337
353
 
338
- describe('Advanced Edge Cases', () => {
339
- it('should handle empty attributes', () => {
354
+ describe("Advanced Edge Cases", () => {
355
+ it("should handle empty attributes", () => {
340
356
  const ast = parseToAST('<input disabled checked="" value="">');
341
357
  const inputElement = ast.children![0]!;
342
358
  expect(inputElement.attributes).toEqual({
343
- disabled: '',
344
- checked: '',
345
- value: ''
359
+ disabled: "",
360
+ checked: "",
361
+ value: "",
346
362
  });
347
363
  });
348
364
 
349
- it('should handle attributes with special characters', () => {
350
- const ast = parseToAST('<div data-test="hello-world" class="my_class-123">');
365
+ it("should handle attributes with special characters", () => {
366
+ const ast = parseToAST(
367
+ '<div data-test="hello-world" class="my_class-123">',
368
+ );
351
369
  const divElement = ast.children![0]!;
352
370
  expect(divElement.attributes).toEqual({
353
- 'data-test': 'hello-world',
354
- 'class': 'my_class-123'
371
+ "data-test": "hello-world",
372
+ class: "my_class-123",
355
373
  });
356
374
  });
357
375
 
358
- it('should handle mixed quotes in attributes', () => {
359
- const ast = parseToAST(`<div title='He said "Hello"' data-info="She's here">`);
376
+ it("should handle mixed quotes in attributes", () => {
377
+ const ast = parseToAST(
378
+ `<div title='He said "Hello"' data-info="She's here">`,
379
+ );
360
380
  const divElement = ast.children![0]!;
361
381
  expect(divElement.attributes!.title).toBe('He said "Hello"');
362
- expect(divElement.attributes!['data-info']).toBe("She's here");
382
+ expect(divElement.attributes!["data-info"]).toBe("She's here");
363
383
  });
364
384
 
365
- it('should handle deeply nested comments', () => {
366
- const ast = parseToAST('<div><!-- Outer <!-- Inner --> comment --></div>');
385
+ it("should handle deeply nested comments", () => {
386
+ const ast = parseToAST(
387
+ "<div><!-- Outer <!-- Inner --> comment --></div>",
388
+ );
367
389
  const divElement = ast.children![0]!;
368
390
  expect(divElement.children!.length).toBeGreaterThanOrEqual(1);
369
391
  expect(divElement.children![0]!.type).toBe(ASTNodeType.Comment);
370
392
  });
371
393
 
372
- it('should handle multiple consecutive whitespace', () => {
373
- const ast = parseToAST('<p> \n\t Hello \n\t World \n\t </p>');
394
+ it("should handle multiple consecutive whitespace", () => {
395
+ const ast = parseToAST(
396
+ "<p> \n\t Hello \n\t World \n\t </p>",
397
+ );
374
398
  const pElement = ast.children![0]!;
375
399
  const textNode = pElement.children![0]!;
376
- expect((textNode as any).content).toContain('Hello');
377
- expect((textNode as any).content).toContain('World');
400
+ expect((textNode as any).content).toContain("Hello");
401
+ expect((textNode as any).content).toContain("World");
378
402
  });
379
403
 
380
- it('should handle malformed nested tags', () => {
381
- const ast = parseToAST('<div><p><span>Text</div></span></p>');
404
+ it("should handle malformed nested tags", () => {
405
+ const ast = parseToAST("<div><p><span>Text</div></span></p>");
382
406
  const divElement = ast.children![0]!;
383
- expect(divElement.tagName).toBe('div');
407
+ expect(divElement.tagName).toBe("div");
384
408
  expect(divElement.children!.length).toBeGreaterThan(0);
385
409
  });
386
410
 
387
- it('should handle orphaned closing tags', () => {
388
- const ast = parseToAST('</div><p>Content</p></span>');
411
+ it("should handle orphaned closing tags", () => {
412
+ const ast = parseToAST("</div><p>Content</p></span>");
389
413
  const pElement = ast.children!.find(
390
- child => child.type === ASTNodeType.Element && child.tagName === 'p'
414
+ (child) => child.type === ASTNodeType.Element && child.tagName === "p",
391
415
  )!;
392
416
  expect(pElement).toBeDefined();
393
- expect((pElement.children![0]! as any).content).toBe('Content');
417
+ expect((pElement.children![0]! as any).content).toBe("Content");
394
418
  });
395
419
 
396
- it('should handle extreme nesting depth', () => {
397
- let html = '';
420
+ it("should handle extreme nesting depth", () => {
421
+ let html = "";
398
422
  const depth = 50;
399
423
  for (let i = 0; i < depth; i++) {
400
424
  html += `<div level="${i}">`;
401
425
  }
402
- html += 'Deep content';
426
+ html += "Deep content";
403
427
  for (let i = 0; i < depth; i++) {
404
- html += '</div>';
428
+ html += "</div>";
405
429
  }
406
430
  const ast = parseToAST(html);
407
431
  let current = ast.children![0]!;
408
432
  for (let i = 0; i < depth - 1; i++) {
409
- expect(current.tagName).toBe('div');
433
+ expect(current.tagName).toBe("div");
410
434
  expect(current.attributes!.level).toBe(i.toString());
411
- current = current.children!.find(child => child.type === ASTNodeType.Element)!;
435
+ current = current.children!.find(
436
+ (child) => child.type === ASTNodeType.Element,
437
+ )!;
412
438
  }
413
- const textNode = current.children!.find(child => child.type === ASTNodeType.Text)!;
414
- expect((textNode as any).content).toBe('Deep content');
439
+ const textNode = current.children!.find(
440
+ (child) => child.type === ASTNodeType.Text,
441
+ )!;
442
+ expect((textNode as any).content).toBe("Deep content");
415
443
  });
416
- })
444
+ });
417
445
 
418
- describe('Complex Entity Handling', () => {
419
- it('should handle numeric character references', () => {
420
- const ast = parseToAST('<p>&#65; &#8364; &#x41; &#x20AC;</p>');
446
+ describe("Complex Entity Handling", () => {
447
+ it("should handle numeric character references", () => {
448
+ const ast = parseToAST("<p>&#65; &#8364; &#x41; &#x20AC;</p>");
421
449
  const pElement = ast.children![0]!;
422
450
  const textNode = pElement.children![0]!;
423
- expect((textNode as any).content).toBe('A € A €');
451
+ expect((textNode as any).content).toBe("A € A €");
424
452
  });
425
453
 
426
- it('should handle mixed entities and text', () => {
427
- const ast = parseToAST('<p>R&amp;D &lt;testing&gt; &quot;quotes&quot; &apos;apostrophe&apos;</p>');
454
+ it("should handle mixed entities and text", () => {
455
+ const ast = parseToAST(
456
+ "<p>R&amp;D &lt;testing&gt; &quot;quotes&quot; &apos;apostrophe&apos;</p>",
457
+ );
428
458
  const pElement = ast.children![0]!;
429
459
  const textNode = pElement.children![0]!;
430
- expect((textNode as any).content).toBe('R&D <testing> "quotes" \'apostrophe\'');
460
+ expect((textNode as any).content).toBe(
461
+ "R&D <testing> \"quotes\" 'apostrophe'",
462
+ );
431
463
  });
432
464
 
433
- it('should handle entities in attributes', () => {
434
- const ast = parseToAST('<div title="R&amp;D &lt;section&gt;" data-test="&quot;hello&quot;">');
465
+ it("should handle entities in attributes", () => {
466
+ const ast = parseToAST(
467
+ '<div title="R&amp;D &lt;section&gt;" data-test="&quot;hello&quot;">',
468
+ );
435
469
  const divElement = ast.children![0]!;
436
- expect(divElement.attributes!.title).toBe('R&D <section>');
437
- expect(divElement.attributes!['data-test']).toBe('"hello"');
470
+ expect(divElement.attributes!.title).toBe("R&D <section>");
471
+ expect(divElement.attributes!["data-test"]).toBe('"hello"');
438
472
  });
439
- })
473
+ });
440
474
 
441
- describe('DOM-like Functionality Tests', () => {
442
- it('should maintain parent-child relationships', () => {
443
- const ast = parseToAST('<div><section><article><h1>Title</h1><p>Content</p></article></section></div>');
475
+ describe("DOM-like Functionality Tests", () => {
476
+ it("should maintain parent-child relationships", () => {
477
+ const ast = parseToAST(
478
+ "<div><section><article><h1>Title</h1><p>Content</p></article></section></div>",
479
+ );
444
480
  const divElement = ast.children![0]!;
445
481
  const sectionElement = divElement.children![0]!;
446
482
  const articleElement = sectionElement.children![0]!;
447
483
  expect(articleElement.children).toHaveLength(2);
448
- expect(articleElement.children![0]!.tagName).toBe('h1');
449
- expect(articleElement.children![1]!.tagName).toBe('p');
484
+ expect(articleElement.children![0]!.tagName).toBe("h1");
485
+ expect(articleElement.children![1]!.tagName).toBe("p");
450
486
  });
451
487
 
452
- it('should handle sibling navigation scenarios', () => {
453
- const ast = parseToAST('<nav><a href="#home">Home</a><a href="#about">About</a><a href="#contact">Contact</a></nav>');
488
+ it("should handle sibling navigation scenarios", () => {
489
+ const ast = parseToAST(
490
+ '<nav><a href="#home">Home</a><a href="#about">About</a><a href="#contact">Contact</a></nav>',
491
+ );
454
492
  const navElement = ast.children![0]!;
455
- const links = navElement.children!.filter(child => child.type === ASTNodeType.Element);
493
+ const links = navElement.children!.filter(
494
+ (child) => child.type === ASTNodeType.Element,
495
+ );
456
496
  expect(links).toHaveLength(3);
457
497
  links.forEach((link, index) => {
458
- expect(link.tagName).toBe('a');
498
+ expect(link.tagName).toBe("a");
459
499
  expect(link.attributes!.href).toBeDefined();
460
500
  expect(link.children![0]!.type).toBe(ASTNodeType.Text);
461
501
  });
462
- expect((links[0]!.children![0]! as any).content).toBe('Home');
463
- expect((links[1]!.children![0]! as any).content).toBe('About');
464
- expect((links[2]!.children![0]! as any).content).toBe('Contact');
502
+ expect((links[0]!.children![0]! as any).content).toBe("Home");
503
+ expect((links[1]!.children![0]! as any).content).toBe("About");
504
+ expect((links[2]!.children![0]! as any).content).toBe("Contact");
465
505
  });
466
506
 
467
- it('should handle form elements with all attribute types', () => {
507
+ it("should handle form elements with all attribute types", () => {
468
508
  const ast = parseToAST(`
469
509
  <form action="/submit" method="post" enctype="multipart/form-data">
470
510
  <input type="text" name="username" required placeholder="Enter username" maxlength="50">
@@ -481,13 +521,17 @@ describe('HTML Parser', () => {
481
521
  <button type="submit" disabled>Submit</button>
482
522
  </form>
483
523
  `);
484
- const formElement = ast.children!.find(child => child.tagName === 'form')!;
485
- expect(formElement.attributes!.action).toBe('/submit');
486
- expect(formElement.attributes!.method).toBe('post');
524
+ const formElement = ast.children!.find(
525
+ (child) => child.tagName === "form",
526
+ )!;
527
+ expect(formElement.attributes!.action).toBe("/submit");
528
+ expect(formElement.attributes!.method).toBe("post");
487
529
  const inputs: ASTNode[] = [];
488
530
  const traverse = (node: ASTNode) => {
489
531
  if (node.type === ASTNodeType.Element) {
490
- if (['input', 'select', 'textarea', 'button'].includes(node.tagName!)) {
532
+ if (
533
+ ["input", "select", "textarea", "button"].includes(node.tagName!)
534
+ ) {
491
535
  inputs.push(node);
492
536
  }
493
537
  }
@@ -497,14 +541,16 @@ describe('HTML Parser', () => {
497
541
  };
498
542
  traverse(formElement);
499
543
  expect(inputs.length).toBeGreaterThan(5);
500
- const usernameInput = inputs.find(input => input.attributes?.name === 'username');
501
- expect(usernameInput!.attributes!.required).toBe('');
502
- expect(usernameInput!.attributes!.placeholder).toBe('Enter username');
503
- const selectElement = inputs.find(input => input.tagName === 'select');
504
- expect(selectElement!.attributes!.multiple).toBe('');
544
+ const usernameInput = inputs.find(
545
+ (input) => input.attributes?.name === "username",
546
+ );
547
+ expect(usernameInput!.attributes!.required).toBe("");
548
+ expect(usernameInput!.attributes!.placeholder).toBe("Enter username");
549
+ const selectElement = inputs.find((input) => input.tagName === "select");
550
+ expect(selectElement!.attributes!.multiple).toBe("");
505
551
  });
506
552
 
507
- it('should handle table structures correctly', () => {
553
+ it("should handle table structures correctly", () => {
508
554
  const ast = parseToAST(`
509
555
  <table border="1" cellpadding="5" cellspacing="0">
510
556
  <thead>
@@ -528,14 +574,20 @@ describe('HTML Parser', () => {
528
574
  </tbody>
529
575
  </table>
530
576
  `);
531
- const tableElement = ast.children!.find(child => child.tagName === 'table')!;
532
- const thead = tableElement.children!.find(child => child.tagName === 'thead');
533
- const tbody = tableElement.children!.find(child => child.tagName === 'tbody');
577
+ const tableElement = ast.children!.find(
578
+ (child) => child.tagName === "table",
579
+ )!;
580
+ const thead = tableElement.children!.find(
581
+ (child) => child.tagName === "thead",
582
+ );
583
+ const tbody = tableElement.children!.find(
584
+ (child) => child.tagName === "tbody",
585
+ );
534
586
  expect(thead).toBeDefined();
535
587
  expect(tbody).toBeDefined();
536
588
  const rows: ASTNode[] = [];
537
589
  const traverse = (node: ASTNode) => {
538
- if (node.tagName === 'tr') {
590
+ if (node.tagName === "tr") {
539
591
  rows.push(node);
540
592
  }
541
593
  if (node.children) {
@@ -546,13 +598,13 @@ describe('HTML Parser', () => {
546
598
  expect(rows).toHaveLength(3);
547
599
  });
548
600
 
549
- it('should handle mixed content with inline elements', () => {
601
+ it("should handle mixed content with inline elements", () => {
550
602
  const ast = parseToAST(`
551
603
  <p>This is <strong>bold text</strong> and this is <em>italic text</em>.
552
604
  Here's a <a href="https://example.com" target="_blank">link</a> and some
553
605
  <code>inline code</code>. Also <span class="highlight">highlighted text</span>.</p>
554
606
  `);
555
- const pElement = ast.children!.find(child => child.tagName === 'p')!;
607
+ const pElement = ast.children!.find((child) => child.tagName === "p")!;
556
608
  let textNodes = 0;
557
609
  let elementNodes = 0;
558
610
  let totalChildren = 0;
@@ -574,7 +626,7 @@ describe('HTML Parser', () => {
574
626
  expect(textNodes).toBeGreaterThan(0);
575
627
  });
576
628
 
577
- it('should preserve document structure integrity', () => {
629
+ it("should preserve document structure integrity", () => {
578
630
  const ast = parseToAST(`<!DOCTYPE html>
579
631
  <html lang="en">
580
632
  <head>
@@ -600,23 +652,35 @@ describe('HTML Parser', () => {
600
652
  </footer>
601
653
  </body>
602
654
  </html>`);
603
- const doctype = ast.children!.find(child => child.type === ASTNodeType.Doctype);
655
+ const doctype = ast.children!.find(
656
+ (child) => child.type === ASTNodeType.Doctype,
657
+ );
604
658
  expect(doctype).toBeDefined();
605
- const htmlElement = ast.children!.find(child => child.tagName === 'html')!;
606
- expect(htmlElement.attributes!.lang).toBe('en');
607
- const headElement = htmlElement.children!.find(child => child.tagName === 'head');
608
- const bodyElement = htmlElement.children!.find(child => child.tagName === 'body');
659
+ const htmlElement = ast.children!.find(
660
+ (child) => child.tagName === "html",
661
+ )!;
662
+ expect(htmlElement.attributes!.lang).toBe("en");
663
+ const headElement = htmlElement.children!.find(
664
+ (child) => child.tagName === "head",
665
+ );
666
+ const bodyElement = htmlElement.children!.find(
667
+ (child) => child.tagName === "body",
668
+ );
609
669
  expect(headElement).toBeDefined();
610
670
  expect(bodyElement).toBeDefined();
611
- const headerElement = bodyElement!.children!.find(child => child.tagName === 'header');
612
- const mainElement = bodyElement!.children!.find(child => child.tagName === 'main');
613
- const footerElement = bodyElement!.children!.find(child => child.tagName === 'footer');
671
+ const headerElement = bodyElement!.children!.find(
672
+ (child) => child.tagName === "header",
673
+ );
674
+ const mainElement = bodyElement!.children!.find(
675
+ (child) => child.tagName === "main",
676
+ );
677
+ const footerElement = bodyElement!.children!.find(
678
+ (child) => child.tagName === "footer",
679
+ );
614
680
  expect(headerElement).toBeDefined();
615
681
  expect(mainElement).toBeDefined();
616
682
  expect(footerElement).toBeDefined();
617
- expect(headerElement!.attributes!.id).toBe('main-header');
683
+ expect(headerElement!.attributes!.id).toBe("main-header");
618
684
  });
619
- })
620
-
621
-
622
- });
685
+ });
686
+ });