@tkeron/html-parser 1.1.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/.github/workflows/npm_deploy.yml +14 -4
  2. package/README.md +6 -6
  3. package/bun.lock +6 -8
  4. package/check-versions.ts +147 -0
  5. package/index.ts +4 -8
  6. package/package.json +5 -6
  7. package/src/dom-simulator/append-child.ts +130 -0
  8. package/src/dom-simulator/append.ts +18 -0
  9. package/src/dom-simulator/attributes.ts +23 -0
  10. package/src/dom-simulator/clone-node.ts +51 -0
  11. package/src/dom-simulator/convert-ast-node-to-dom.ts +37 -0
  12. package/src/dom-simulator/create-cdata.ts +18 -0
  13. package/src/dom-simulator/create-comment.ts +23 -0
  14. package/src/dom-simulator/create-doctype.ts +24 -0
  15. package/src/dom-simulator/create-document.ts +81 -0
  16. package/src/dom-simulator/create-element.ts +195 -0
  17. package/src/dom-simulator/create-processing-instruction.ts +19 -0
  18. package/src/dom-simulator/create-temp-parent.ts +9 -0
  19. package/src/dom-simulator/create-text-node.ts +23 -0
  20. package/src/dom-simulator/escape-text-content.ts +6 -0
  21. package/src/dom-simulator/find-special-elements.ts +14 -0
  22. package/src/dom-simulator/get-text-content.ts +18 -0
  23. package/src/dom-simulator/index.ts +36 -0
  24. package/src/dom-simulator/inner-outer-html.ts +182 -0
  25. package/src/dom-simulator/insert-after.ts +20 -0
  26. package/src/dom-simulator/insert-before.ts +108 -0
  27. package/src/dom-simulator/matches.ts +26 -0
  28. package/src/dom-simulator/node-types.ts +26 -0
  29. package/src/dom-simulator/prepend.ts +24 -0
  30. package/src/dom-simulator/remove-child.ts +68 -0
  31. package/src/dom-simulator/remove.ts +7 -0
  32. package/src/dom-simulator/replace-child.ts +152 -0
  33. package/src/dom-simulator/set-text-content.ts +33 -0
  34. package/src/dom-simulator/update-element-content.ts +56 -0
  35. package/src/dom-simulator.ts +12 -1126
  36. package/src/encoding/constants.ts +8 -0
  37. package/src/encoding/detect-encoding.ts +21 -0
  38. package/src/encoding/index.ts +1 -0
  39. package/src/encoding/normalize-encoding.ts +6 -0
  40. package/src/html-entities.ts +2127 -0
  41. package/src/index.ts +5 -5
  42. package/src/parser/adoption-agency-helpers.ts +145 -0
  43. package/src/parser/constants.ts +137 -0
  44. package/src/parser/dom-to-ast.ts +79 -0
  45. package/src/parser/index.ts +9 -0
  46. package/src/parser/parse.ts +772 -0
  47. package/src/parser/types.ts +56 -0
  48. package/src/selectors/find-elements-descendant.ts +47 -0
  49. package/src/selectors/index.ts +2 -0
  50. package/src/selectors/matches-selector.ts +12 -0
  51. package/src/selectors/matches-token.ts +27 -0
  52. package/src/selectors/parse-selector.ts +48 -0
  53. package/src/selectors/query-selector-all.ts +43 -0
  54. package/src/selectors/query-selector.ts +6 -0
  55. package/src/selectors/types.ts +10 -0
  56. package/src/serializer/attributes.ts +74 -0
  57. package/src/serializer/escape.ts +13 -0
  58. package/src/serializer/index.ts +1 -0
  59. package/src/serializer/serialize-tokens.ts +511 -0
  60. package/src/tokenizer/calculate-position.ts +10 -0
  61. package/src/tokenizer/constants.ts +11 -0
  62. package/src/tokenizer/decode-entities.ts +64 -0
  63. package/src/tokenizer/index.ts +2 -0
  64. package/src/tokenizer/parse-attributes.ts +74 -0
  65. package/src/tokenizer/tokenize.ts +165 -0
  66. package/src/tokenizer/types.ts +25 -0
  67. package/tests/adoption-agency-helpers.test.ts +304 -0
  68. package/tests/advanced.test.ts +242 -221
  69. package/tests/cloneNode.test.ts +19 -66
  70. package/tests/custom-elements-head.test.ts +54 -55
  71. package/tests/dom-extended.test.ts +77 -64
  72. package/tests/dom-manipulation.test.ts +51 -24
  73. package/tests/dom.test.ts +15 -13
  74. package/tests/encoding/detect-encoding.test.ts +33 -0
  75. package/tests/google-dom.test.ts +2 -2
  76. package/tests/helpers/tokenizer-adapter.test.ts +29 -43
  77. package/tests/helpers/tokenizer-adapter.ts +36 -33
  78. package/tests/helpers/tree-adapter.test.ts +20 -20
  79. package/tests/helpers/tree-adapter.ts +34 -24
  80. package/tests/html-entities-text.test.ts +6 -2
  81. package/tests/innerhtml-void-elements.test.ts +52 -36
  82. package/tests/outerHTML-replacement.test.ts +37 -65
  83. package/tests/parser/dom-to-ast.test.ts +109 -0
  84. package/tests/parser/parse.test.ts +139 -0
  85. package/tests/parser.test.ts +281 -217
  86. package/tests/selectors/query-selector-all.test.ts +39 -0
  87. package/tests/selectors/query-selector.test.ts +42 -0
  88. package/tests/serializer/attributes.test.ts +132 -0
  89. package/tests/serializer/escape.test.ts +51 -0
  90. package/tests/serializer/serialize-tokens.test.ts +80 -0
  91. package/tests/serializer-core.test.ts +6 -6
  92. package/tests/serializer-injectmeta.test.ts +6 -6
  93. package/tests/serializer-optionaltags.test.ts +9 -6
  94. package/tests/serializer-options.test.ts +6 -6
  95. package/tests/serializer-whitespace.test.ts +6 -6
  96. package/tests/tokenizer/calculate-position.test.ts +34 -0
  97. package/tests/tokenizer/decode-entities.test.ts +31 -0
  98. package/tests/tokenizer/parse-attributes.test.ts +44 -0
  99. package/tests/tokenizer/tokenize.test.ts +757 -0
  100. package/tests/tokenizer-namedEntities.test.ts +10 -7
  101. package/tests/tokenizer-pendingSpecChanges.test.ts +10 -7
  102. package/tests/tokenizer.test.ts +268 -256
  103. package/tests/tree-construction-adoption01.test.ts +25 -16
  104. package/tests/tree-construction-adoption02.test.ts +30 -19
  105. package/tests/tree-construction-domjs-unsafe.test.ts +6 -4
  106. package/tests/tree-construction-entities02.test.ts +18 -16
  107. package/tests/tree-construction-html5test-com.test.ts +16 -10
  108. package/tests/tree-construction-math.test.ts +11 -9
  109. package/tests/tree-construction-namespace-sensitivity.test.ts +11 -9
  110. package/tests/tree-construction-noscript01.test.ts +11 -9
  111. package/tests/tree-construction-ruby.test.ts +6 -4
  112. package/tests/tree-construction-scriptdata01.test.ts +6 -4
  113. package/tests/tree-construction-svg.test.ts +6 -4
  114. package/tests/tree-construction-template.test.ts +6 -4
  115. package/tests/tree-construction-tests10.test.ts +6 -4
  116. package/tests/tree-construction-tests11.test.ts +6 -4
  117. package/tests/tree-construction-tests20.test.ts +7 -4
  118. package/tests/tree-construction-tests21.test.ts +7 -4
  119. package/tests/tree-construction-tests23.test.ts +7 -4
  120. package/tests/tree-construction-tests24.test.ts +7 -4
  121. package/tests/tree-construction-tests5.test.ts +6 -5
  122. package/tests/tree-construction-tests6.test.ts +6 -5
  123. package/tests/tree-construction-tests_innerHTML_1.test.ts +6 -5
  124. package/tests/void-elements.test.ts +85 -40
  125. package/tsconfig.json +1 -1
  126. package/src/css-selector.ts +0 -185
  127. package/src/encoding.ts +0 -39
  128. package/src/parser.ts +0 -682
  129. package/src/serializer.ts +0 -450
  130. package/src/tokenizer.ts +0 -325
  131. package/tests/selectors.test.ts +0 -128
@@ -1,21 +1,29 @@
1
1
  // @ts-nocheck
2
- import { expect, test, describe, it } from 'bun:test';
3
- import { tokenize, TokenType } from '../src/tokenizer';
4
- import { parse, ASTNodeType, domToAST, type ASTNode } from '../src/parser';
2
+ import { expect, test, describe, it } from "bun:test";
3
+ import { tokenize, TokenType } from "../src/tokenizer/index.js";
4
+ import {
5
+ parse,
6
+ ASTNodeType,
7
+ domToAST,
8
+ type ASTNode,
9
+ } from "../src/parser/index";
5
10
 
6
11
  function parseToAST(html: string): ASTNode {
7
12
  const tokens = tokenize(html);
8
13
  const dom = parse(tokens);
9
14
  const ast = domToAST(dom);
10
-
11
- const hasExplicitHtml = html.includes('<html') || html.includes('<!DOCTYPE') || html.includes('<!doctype');
15
+
16
+ const hasExplicitHtml =
17
+ html.includes("<html") ||
18
+ html.includes("<!DOCTYPE") ||
19
+ html.includes("<!doctype");
12
20
  if (hasExplicitHtml) {
13
21
  return ast;
14
22
  }
15
-
16
- const htmlEl = ast.children?.find(c => c.tagName === 'html');
23
+
24
+ const htmlEl = ast.children?.find((c) => c.tagName === "html");
17
25
  if (htmlEl) {
18
- const bodyEl = htmlEl.children?.find(c => c.tagName === 'body');
26
+ const bodyEl = htmlEl.children?.find((c) => c.tagName === "body");
19
27
  if (bodyEl && bodyEl.children) {
20
28
  return { type: ASTNodeType.Document, children: bodyEl.children };
21
29
  }
@@ -23,44 +31,47 @@ function parseToAST(html: string): ASTNode {
23
31
  return ast;
24
32
  }
25
33
 
26
- describe('HTML Parser & Tokenizer - Advanced Tests', () => {
27
-
28
- describe('Tokenizer Edge Cases', () => {
29
- it('should handle attributes with no spaces', () => {
34
+ describe("HTML Parser & Tokenizer - Advanced Tests", () => {
35
+ describe("Tokenizer Edge Cases", () => {
36
+ it("should handle attributes with no spaces", () => {
30
37
  const tokens = tokenize('<div class="test"id="main"data-value="123">');
31
38
  expect(tokens.length).toBeGreaterThan(0);
32
39
  const tag = tokens[0]!;
33
-
40
+
34
41
  expect(tag.attributes).toEqual({
35
- class: 'test',
36
- id: 'main',
37
- 'data-value': '123'
42
+ class: "test",
43
+ id: "main",
44
+ "data-value": "123",
38
45
  });
39
46
  });
40
47
 
41
- it('should handle mixed quote styles', () => {
42
- const tokens = tokenize(`<div class='single' id="double" data-test='mix "quoted" content'>`);
48
+ it("should handle mixed quote styles", () => {
49
+ const tokens = tokenize(
50
+ `<div class='single' id="double" data-test='mix "quoted" content'>`,
51
+ );
43
52
  expect(tokens.length).toBeGreaterThan(0);
44
53
  const tag = tokens[0]!;
45
-
46
- expect(tag.attributes!.class).toBe('single');
47
- expect(tag.attributes!.id).toBe('double');
48
- expect(tag.attributes!['data-test']).toBe('mix "quoted" content');
54
+
55
+ expect(tag.attributes!.class).toBe("single");
56
+ expect(tag.attributes!.id).toBe("double");
57
+ expect(tag.attributes!["data-test"]).toBe('mix "quoted" content');
49
58
  });
50
59
 
51
- it('should handle unicode characters', () => {
52
- const tokens = tokenize('<div title="测试" data-emoji="🚀" class="lorem">');
60
+ it("should handle unicode characters", () => {
61
+ const tokens = tokenize(
62
+ '<div title="测试" data-emoji="🚀" class="lorem">',
63
+ );
53
64
  expect(tokens.length).toBeGreaterThan(0);
54
65
  const tag = tokens[0]!;
55
-
66
+
56
67
  expect(tag.attributes).toEqual({
57
- title: '测试',
58
- 'data-emoji': '🚀',
59
- class: 'lorem'
68
+ title: "测试",
69
+ "data-emoji": "🚀",
70
+ class: "lorem",
60
71
  });
61
72
  });
62
73
 
63
- it('should handle complex CDATA content as bogus comment', () => {
74
+ it("should handle complex CDATA content as bogus comment", () => {
64
75
  const complexContent = `
65
76
  function test() {
66
77
  return "<div>HTML inside JS</div>";
@@ -70,41 +81,43 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
70
81
  const tokens = tokenize(`<![CDATA[${complexContent}]]>`);
71
82
  expect(tokens.length).toBeGreaterThan(0);
72
83
  const cdataToken = tokens[0]!;
73
-
84
+
74
85
  expect(cdataToken.type).toBe(TokenType.COMMENT);
75
- expect(cdataToken.value).toBe('[CDATA[' + complexContent + ']]');
86
+ expect(cdataToken.value).toBe("[CDATA[" + complexContent + "]]");
76
87
  });
77
88
 
78
- it('should handle performance with large documents', () => {
79
- let html = '<div>';
89
+ it("should handle performance with large documents", () => {
90
+ let html = "<div>";
80
91
  for (let i = 0; i < 1000; i++) {
81
92
  html += `<p id="para-${i}">Content ${i}</p>`;
82
93
  }
83
- html += '</div>';
84
-
94
+ html += "</div>";
95
+
85
96
  const startTime = Date.now();
86
97
  const tokens = tokenize(html);
87
98
  const endTime = Date.now();
88
-
99
+
89
100
  expect(tokens.length).toBeGreaterThan(2000);
90
- expect(endTime - startTime).toBeLessThan(1000);
101
+ expect(endTime - startTime).toBeLessThan(1000);
91
102
  });
92
103
  });
93
104
 
94
- describe('Parser DOM-like Functionality', () => {
95
- it('should create proper parent-child relationships', () => {
96
- const ast = parseToAST('<div><section><article><h1>Title</h1><p>Content</p></article></section></div>');
97
-
105
+ describe("Parser DOM-like Functionality", () => {
106
+ it("should create proper parent-child relationships", () => {
107
+ const ast = parseToAST(
108
+ "<div><section><article><h1>Title</h1><p>Content</p></article></section></div>",
109
+ );
110
+
98
111
  const divElement = ast.children![0]!;
99
112
  const sectionElement = divElement.children![0]!;
100
113
  const articleElement = sectionElement.children![0]!;
101
-
114
+
102
115
  expect(articleElement.children).toHaveLength(2);
103
- expect(articleElement.children![0]!.tagName).toBe('h1');
104
- expect(articleElement.children![1]!.tagName).toBe('p');
116
+ expect(articleElement.children![0]!.tagName).toBe("h1");
117
+ expect(articleElement.children![1]!.tagName).toBe("p");
105
118
  });
106
119
 
107
- it('should handle complex navigation scenarios', () => {
120
+ it("should handle complex navigation scenarios", () => {
108
121
  const html = `
109
122
  <nav>
110
123
  <ul>
@@ -115,21 +128,27 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
115
128
  </nav>
116
129
  `;
117
130
  const ast = parseToAST(html);
118
-
119
- const navElement = ast.children!.find(child => child.tagName === 'nav')!;
120
- const ulElement = navElement.children!.find(child => child.tagName === 'ul')!;
121
- const liElements = ulElement.children!.filter(child => child.tagName === 'li');
122
-
131
+
132
+ const navElement = ast.children!.find(
133
+ (child) => child.tagName === "nav",
134
+ )!;
135
+ const ulElement = navElement.children!.find(
136
+ (child) => child.tagName === "ul",
137
+ )!;
138
+ const liElements = ulElement.children!.filter(
139
+ (child) => child.tagName === "li",
140
+ );
141
+
123
142
  expect(liElements).toHaveLength(3);
124
-
143
+
125
144
  liElements.forEach((li, index) => {
126
- const anchor = li.children!.find(child => child.tagName === 'a')!;
145
+ const anchor = li.children!.find((child) => child.tagName === "a")!;
127
146
  expect(anchor.attributes!.href).toBeDefined();
128
147
  expect(anchor.children![0]!.type).toBe(ASTNodeType.Text);
129
148
  });
130
149
  });
131
150
 
132
- it('should handle form elements with complex attributes', () => {
151
+ it("should handle form elements with complex attributes", () => {
133
152
  const html = `
134
153
  <form action="/submit" method="post">
135
154
  <input type="email" name="email" required pattern="[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}$">
@@ -141,15 +160,19 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
141
160
  </form>
142
161
  `;
143
162
  const ast = parseToAST(html);
144
-
145
- const formElement = ast.children!.find(child => child.tagName === 'form')!;
146
- expect(formElement.attributes!.action).toBe('/submit');
147
- expect(formElement.attributes!.method).toBe('post');
148
-
163
+
164
+ const formElement = ast.children!.find(
165
+ (child) => child.tagName === "form",
166
+ )!;
167
+ expect(formElement.attributes!.action).toBe("/submit");
168
+ expect(formElement.attributes!.method).toBe("post");
169
+
149
170
  const formElements: ASTNode[] = [];
150
171
  const traverse = (node: ASTNode) => {
151
172
  if (node.type === ASTNodeType.Element) {
152
- if (['input', 'select', 'textarea', 'option'].includes(node.tagName!)) {
173
+ if (
174
+ ["input", "select", "textarea", "option"].includes(node.tagName!)
175
+ ) {
153
176
  formElements.push(node);
154
177
  }
155
178
  }
@@ -158,18 +181,20 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
158
181
  }
159
182
  };
160
183
  traverse(formElement);
161
-
184
+
162
185
  expect(formElements.length).toBeGreaterThan(3);
163
-
164
- const emailInput = formElements.find(el => el.attributes?.name === 'email');
165
- expect(emailInput!.attributes!.required).toBe('');
166
- expect(emailInput!.attributes!.pattern).toContain('@');
167
-
168
- const selectElement = formElements.find(el => el.tagName === 'select');
169
- expect(selectElement!.attributes!.multiple).toBe('');
186
+
187
+ const emailInput = formElements.find(
188
+ (el) => el.attributes?.name === "email",
189
+ );
190
+ expect(emailInput!.attributes!.required).toBe("");
191
+ expect(emailInput!.attributes!.pattern).toContain("@");
192
+
193
+ const selectElement = formElements.find((el) => el.tagName === "select");
194
+ expect(selectElement!.attributes!.multiple).toBe("");
170
195
  });
171
196
 
172
- it('should handle table structures', () => {
197
+ it("should handle table structures", () => {
173
198
  const html = `
174
199
  <table>
175
200
  <thead>
@@ -191,18 +216,24 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
191
216
  </table>
192
217
  `;
193
218
  const ast = parseToAST(html);
194
-
195
- const tableElement = ast.children!.find(child => child.tagName === 'table')!;
196
-
197
- const thead = tableElement.children!.find(child => child.tagName === 'thead');
198
- const tbody = tableElement.children!.find(child => child.tagName === 'tbody');
199
-
219
+
220
+ const tableElement = ast.children!.find(
221
+ (child) => child.tagName === "table",
222
+ )!;
223
+
224
+ const thead = tableElement.children!.find(
225
+ (child) => child.tagName === "thead",
226
+ );
227
+ const tbody = tableElement.children!.find(
228
+ (child) => child.tagName === "tbody",
229
+ );
230
+
200
231
  expect(thead).toBeDefined();
201
232
  expect(tbody).toBeDefined();
202
-
233
+
203
234
  const rows: ASTNode[] = [];
204
235
  const traverse = (node: ASTNode) => {
205
- if (node.tagName === 'tr') {
236
+ if (node.tagName === "tr") {
206
237
  rows.push(node);
207
238
  }
208
239
  if (node.children) {
@@ -210,23 +241,23 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
210
241
  }
211
242
  };
212
243
  traverse(tableElement);
213
-
214
- expect(rows).toHaveLength(3);
244
+
245
+ expect(rows).toHaveLength(3);
215
246
  });
216
247
 
217
- it('should handle mixed inline content', () => {
248
+ it("should handle mixed inline content", () => {
218
249
  const html = `
219
250
  <p>This is <strong>bold</strong> and <em>italic</em>.
220
251
  Here's a <a href="https://example.com">link</a> and
221
252
  <code>inline code</code>.</p>
222
253
  `;
223
254
  const ast = parseToAST(html);
224
-
225
- const pElement = ast.children!.find(child => child.tagName === 'p')!;
226
-
255
+
256
+ const pElement = ast.children!.find((child) => child.tagName === "p")!;
257
+
227
258
  let textNodes = 0;
228
259
  let elementNodes = 0;
229
-
260
+
230
261
  const traverse = (node: ASTNode) => {
231
262
  if (node.type === ASTNodeType.Text && (node as any).content?.trim()) {
232
263
  textNodes++;
@@ -237,16 +268,16 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
237
268
  node.children.forEach(traverse);
238
269
  }
239
270
  };
240
-
271
+
241
272
  if (pElement.children) {
242
273
  pElement.children.forEach(traverse);
243
274
  }
244
-
245
- expect(elementNodes).toBeGreaterThan(3);
275
+
276
+ expect(elementNodes).toBeGreaterThan(3);
246
277
  expect(textNodes).toBeGreaterThan(0);
247
278
  });
248
279
 
249
- it('should preserve complete document structure', () => {
280
+ it("should preserve complete document structure", () => {
250
281
  const html = `<!DOCTYPE html>
251
282
  <html lang="en">
252
283
  <head>
@@ -270,53 +301,71 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
270
301
  </footer>
271
302
  </body>
272
303
  </html>`;
273
-
304
+
274
305
  const ast = parseToAST(html);
275
-
276
- const doctype = ast.children!.find(child => child.type === ASTNodeType.Doctype);
306
+
307
+ const doctype = ast.children!.find(
308
+ (child) => child.type === ASTNodeType.Doctype,
309
+ );
277
310
  expect(doctype).toBeDefined();
278
-
279
- const htmlElement = ast.children!.find(child => child.tagName === 'html')!;
280
- expect(htmlElement.attributes!.lang).toBe('en');
281
-
282
- const headElement = htmlElement.children!.find(child => child.tagName === 'head');
283
- const bodyElement = htmlElement.children!.find(child => child.tagName === 'body');
284
-
311
+
312
+ const htmlElement = ast.children!.find(
313
+ (child) => child.tagName === "html",
314
+ )!;
315
+ expect(htmlElement.attributes!.lang).toBe("en");
316
+
317
+ const headElement = htmlElement.children!.find(
318
+ (child) => child.tagName === "head",
319
+ );
320
+ const bodyElement = htmlElement.children!.find(
321
+ (child) => child.tagName === "body",
322
+ );
323
+
285
324
  expect(headElement).toBeDefined();
286
325
  expect(bodyElement).toBeDefined();
287
-
288
- const headerElement = bodyElement!.children!.find(child => child.tagName === 'header');
289
- const mainElement = bodyElement!.children!.find(child => child.tagName === 'main');
290
- const footerElement = bodyElement!.children!.find(child => child.tagName === 'footer');
291
-
326
+
327
+ const headerElement = bodyElement!.children!.find(
328
+ (child) => child.tagName === "header",
329
+ );
330
+ const mainElement = bodyElement!.children!.find(
331
+ (child) => child.tagName === "main",
332
+ );
333
+ const footerElement = bodyElement!.children!.find(
334
+ (child) => child.tagName === "footer",
335
+ );
336
+
292
337
  expect(headerElement).toBeDefined();
293
338
  expect(mainElement).toBeDefined();
294
339
  expect(footerElement).toBeDefined();
295
-
296
- expect(headerElement!.attributes!.id).toBe('main-header');
340
+
341
+ expect(headerElement!.attributes!.id).toBe("main-header");
297
342
  });
298
343
  });
299
344
 
300
- describe('Real-world Content Handling', () => {
301
- it('should handle SVG content', () => {
345
+ describe("Real-world Content Handling", () => {
346
+ it("should handle SVG content", () => {
302
347
  const svg = `
303
348
  <svg width="100" height="100" xmlns="http://www.w3.org/2000/svg">
304
349
  <circle cx="50" cy="50" r="40" fill="red"/>
305
350
  <text x="50" y="50">SVG</text>
306
351
  </svg>
307
352
  `;
308
-
353
+
309
354
  const ast = parseToAST(svg);
310
-
311
- const svgElement = ast.children!.find(child => child.tagName === 'svg')!;
312
- expect(svgElement.attributes!.xmlns).toBe('http://www.w3.org/2000/svg');
313
-
314
- const circleElement = svgElement.children!.find(child => child.tagName === 'circle');
355
+
356
+ const svgElement = ast.children!.find(
357
+ (child) => child.tagName === "svg",
358
+ )!;
359
+ expect(svgElement.attributes!.xmlns).toBe("http://www.w3.org/2000/svg");
360
+
361
+ const circleElement = svgElement.children!.find(
362
+ (child) => child.tagName === "circle",
363
+ );
315
364
  expect(circleElement).toBeDefined();
316
- expect(circleElement!.attributes!.fill).toBe('red');
365
+ expect(circleElement!.attributes!.fill).toBe("red");
317
366
  });
318
367
 
319
- it('should handle script and style tags', () => {
368
+ it("should handle script and style tags", () => {
320
369
  const html = `
321
370
  <body>
322
371
  <script type="text/javascript">
@@ -329,9 +378,9 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
329
378
  </style>
330
379
  </body>
331
380
  `;
332
-
381
+
333
382
  const ast = parseToAST(html);
334
-
383
+
335
384
  function findByTagName(node: ASTNode, tagName: string): ASTNode | null {
336
385
  if (node.tagName === tagName) return node;
337
386
  if (node.children) {
@@ -342,159 +391,131 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
342
391
  }
343
392
  return null;
344
393
  }
345
-
346
- const scriptElement = findByTagName(ast, 'script');
347
- const styleElement = findByTagName(ast, 'style');
348
-
349
- expect(scriptElement!.attributes!.type).toBe('text/javascript');
350
- expect(styleElement!.attributes!.type).toBe('text/css');
394
+
395
+ const scriptElement = findByTagName(ast, "script");
396
+ const styleElement = findByTagName(ast, "style");
397
+
398
+ expect(scriptElement!.attributes!.type).toBe("text/javascript");
399
+ expect(styleElement!.attributes!.type).toBe("text/css");
351
400
  });
352
401
  });
353
402
 
354
- describe('Error Recovery and Edge Cases', () => {
355
- it('should handle extreme nesting depth', () => {
356
- let html = '';
403
+ describe("Error Recovery and Edge Cases", () => {
404
+ it("should handle extreme nesting depth", () => {
405
+ let html = "";
357
406
  const depth = 100;
358
-
407
+
359
408
  for (let i = 0; i < depth; i++) {
360
409
  html += `<div level="${i}">`;
361
410
  }
362
- html += 'Deep content';
411
+ html += "Deep content";
363
412
  for (let i = 0; i < depth; i++) {
364
- html += '</div>';
413
+ html += "</div>";
365
414
  }
366
-
415
+
367
416
  const ast = parseToAST(html);
368
-
417
+
369
418
  let current = ast.children![0]!;
370
419
  for (let i = 0; i < depth - 1; i++) {
371
- expect(current.tagName).toBe('div');
420
+ expect(current.tagName).toBe("div");
372
421
  expect(current.attributes!.level).toBe(i.toString());
373
- current = current.children!.find(child => child.type === ASTNodeType.Element)!;
422
+ current = current.children!.find(
423
+ (child) => child.type === ASTNodeType.Element,
424
+ )!;
374
425
  }
375
-
376
- const textNode = current.children!.find(child => child.type === ASTNodeType.Text)!;
377
- expect((textNode as any).content).toBe('Deep content');
426
+
427
+ const textNode = current.children!.find(
428
+ (child) => child.type === ASTNodeType.Text,
429
+ )!;
430
+ expect((textNode as any).content).toBe("Deep content");
378
431
  });
379
432
 
380
- it('should handle malformed HTML gracefully', () => {
381
- const malformedHTML = '<div><p><span>Text</div></span></p>';
433
+ it("should handle malformed HTML gracefully", () => {
434
+ const malformedHTML = "<div><p><span>Text</div></span></p>";
382
435
  const ast = parseToAST(malformedHTML);
383
-
436
+
384
437
  const divElement = ast.children![0]!;
385
- expect(divElement.tagName).toBe('div');
438
+ expect(divElement.tagName).toBe("div");
386
439
  expect(divElement.children!.length).toBeGreaterThan(0);
387
440
  });
388
441
 
389
- it('should handle orphaned closing tags', () => {
390
- const html = '</div><p>Valid content</p></span>';
442
+ it("should handle orphaned closing tags", () => {
443
+ const html = "</div><p>Valid content</p></span>";
391
444
  const ast = parseToAST(html);
392
-
445
+
393
446
  const pElement = ast.children!.find(
394
- child => child.type === ASTNodeType.Element && child.tagName === 'p'
447
+ (child) => child.type === ASTNodeType.Element && child.tagName === "p",
395
448
  )!;
396
449
  expect(pElement).toBeDefined();
397
- expect((pElement.children![0]! as any).content).toBe('Valid content');
398
- });
399
-
400
- it.skip('should handle mixed content types in single document', () => {
401
- const complexHTML = `
402
- <?xml version="1.0"?>
403
- <!DOCTYPE html>
404
- <!-- Document start -->
405
- <html>
406
- <head>
407
- <title>Test &amp; Demo</title>
408
- <![CDATA[Raw data here]]>
409
- </head>
410
- <body>
411
- <h1>Main Title</h1>
412
- <p>Paragraph with <strong>bold</strong> text.</p>
413
- <!-- Body content -->
414
- </body>
415
- </html>
416
- <!-- Document end -->
417
- `;
418
-
419
- const ast = parseToAST(complexHTML);
420
-
421
- const nodeCounts: Record<string, number> = {
422
- 'processing-instruction': 0,
423
- [ASTNodeType.Doctype]: 0,
424
- [ASTNodeType.Comment]: 0,
425
- [ASTNodeType.Element]: 0,
426
- [ASTNodeType.Text]: 0,
427
- [ASTNodeType.CDATA]: 0
428
- };
429
-
430
- const traverse = (node: ASTNode) => {
431
- if (node.type in nodeCounts) {
432
- nodeCounts[node.type]++;
433
- }
434
- if (node.children) {
435
- node.children.forEach(traverse);
436
- }
437
- };
438
-
439
- ast.children!.forEach(traverse);
440
-
441
- expect(nodeCounts['processing-instruction']).toBeGreaterThan(0);
442
- expect(nodeCounts[ASTNodeType.Doctype]).toBeGreaterThan(0);
443
- expect(nodeCounts[ASTNodeType.Comment]).toBeGreaterThan(0);
444
- expect(nodeCounts[ASTNodeType.Element]).toBeGreaterThan(0);
445
- expect(nodeCounts[ASTNodeType.Text]).toBeGreaterThan(0);
446
- expect(nodeCounts[ASTNodeType.CDATA]).toBeGreaterThan(0);
450
+ expect((pElement.children![0]! as any).content).toBe("Valid content");
447
451
  });
448
452
  });
449
453
 
450
- describe('Security and Template Edge Cases', () => {
451
- it('should treat javascript: urls as regular attribute values', () => {
454
+ describe("Security and Template Edge Cases", () => {
455
+ it("should treat javascript: urls as regular attribute values", () => {
452
456
  const html = `<a href="javascript:alert('XSS')">Click me</a>`;
453
457
  const ast = parseToAST(html);
454
- const aElement = ast.children!.find(child => child.tagName === 'a')!;
458
+ const aElement = ast.children!.find((child) => child.tagName === "a")!;
455
459
  expect(aElement).toBeDefined();
456
460
  expect(aElement.attributes!.href).toBe("javascript:alert('XSS')");
457
461
  });
458
462
 
459
- it('should correctly parse event handler attributes like onerror', () => {
463
+ it("should correctly parse event handler attributes like onerror", () => {
460
464
  const html = `<img src="invalid" onerror="alert('XSS')">`;
461
465
  const ast = parseToAST(html);
462
- const imgElement = ast.children!.find(child => child.tagName === 'img')!;
466
+ const imgElement = ast.children!.find(
467
+ (child) => child.tagName === "img",
468
+ )!;
463
469
  expect(imgElement).toBeDefined();
464
470
  expect(imgElement.attributes!.onerror).toBe("alert('XSS')");
465
471
  });
466
472
 
467
- it('should treat template engine syntax as plain text', () => {
473
+ it("should treat template engine syntax as plain text", () => {
468
474
  const html = `<div>{{ user.name }}</div><p>Hello, &lt;%= name %&gt;</p>`;
469
475
  const ast = parseToAST(html);
470
476
 
471
- const divElement = ast.children!.find(child => child.tagName === 'div')!;
477
+ const divElement = ast.children!.find(
478
+ (child) => child.tagName === "div",
479
+ )!;
472
480
  expect(divElement).toBeDefined();
473
- const divText = divElement.children!.find(child => child.type === ASTNodeType.Text)!;
474
- expect((divText as any).content).toBe('{{ user.name }}');
481
+ const divText = divElement.children!.find(
482
+ (child) => child.type === ASTNodeType.Text,
483
+ )!;
484
+ expect((divText as any).content).toBe("{{ user.name }}");
475
485
 
476
- const pElement = ast.children!.find(child => child.tagName === 'p')!;
486
+ const pElement = ast.children!.find((child) => child.tagName === "p")!;
477
487
  expect(pElement).toBeDefined();
478
- const pText = pElement.children!.find(child => child.type === ASTNodeType.Text)!;
479
- expect((pText as any).content).toBe('Hello, <%= name %>');
488
+ const pText = pElement.children!.find(
489
+ (child) => child.type === ASTNodeType.Text,
490
+ )!;
491
+ expect((pText as any).content).toBe("Hello, <%= name %>");
480
492
  });
481
493
 
482
- it('should handle null characters in content gracefully', () => {
483
- const html = '<div>Hello\0World</div>';
484
- const ast = parseToAST(html);
485
- const divElement = ast.children!.find(child => child.tagName === 'div')!;
486
- const textNode = divElement.children!.find(child => child.type === ASTNodeType.Text)!;
487
- expect((textNode as any).content).toBe('Hello\uFFFDWorld');
494
+ it("should handle null characters in content gracefully", () => {
495
+ const html = "<div>Hello\0World</div>";
496
+ const ast = parseToAST(html);
497
+ const divElement = ast.children!.find(
498
+ (child) => child.tagName === "div",
499
+ )!;
500
+ const textNode = divElement.children!.find(
501
+ (child) => child.type === ASTNodeType.Text,
502
+ )!;
503
+ expect((textNode as any).content).toBe("Hello\uFFFDWorld");
488
504
  });
489
505
 
490
- it('should handle control characters in content', () => {
491
- const html = '<div>Line1\x08\x09Line2\x0BLine3\x0CLine4\x0DLine5</div>';
492
- const ast = parseToAST(html);
493
- const divElement = ast.children!.find(child => child.tagName === 'div')!;
494
- const textNode = divElement.children!.find(child => child.type === ASTNodeType.Text)!;
495
- expect((textNode as any).content).toContain('\x09');
496
- expect((textNode as any).content).toContain('\x0D');
497
- expect((textNode as any).content).toContain('Line1');
498
- expect((textNode as any).content).toContain('Line5'); });
506
+ it("should handle control characters in content", () => {
507
+ const html = "<div>Line1\x08\x09Line2\x0BLine3\x0CLine4\x0DLine5</div>";
508
+ const ast = parseToAST(html);
509
+ const divElement = ast.children!.find(
510
+ (child) => child.tagName === "div",
511
+ )!;
512
+ const textNode = divElement.children!.find(
513
+ (child) => child.type === ASTNodeType.Text,
514
+ )!;
515
+ expect((textNode as any).content).toContain("\x09");
516
+ expect((textNode as any).content).toContain("\x0D");
517
+ expect((textNode as any).content).toContain("Line1");
518
+ expect((textNode as any).content).toContain("Line5");
519
+ });
499
520
  });
500
- });
521
+ });