@tkeron/html-parser 0.1.7 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +1 -7
  2. package/bun.lock +5 -0
  3. package/index.ts +4 -0
  4. package/package.json +7 -1
  5. package/src/css-selector.ts +1 -1
  6. package/src/dom-simulator.ts +38 -16
  7. package/src/encoding.ts +39 -0
  8. package/src/index.ts +9 -0
  9. package/src/parser.ts +478 -144
  10. package/src/serializer.ts +450 -0
  11. package/src/tokenizer.ts +59 -43
  12. package/tests/advanced.test.ts +119 -106
  13. package/tests/custom-elements.test.ts +172 -162
  14. package/tests/dom-extended.test.ts +12 -12
  15. package/tests/dom-manipulation.test.ts +9 -10
  16. package/tests/dom.test.ts +32 -27
  17. package/tests/helpers/tokenizer-adapter.test.ts +70 -0
  18. package/tests/helpers/tokenizer-adapter.ts +65 -0
  19. package/tests/helpers/tree-adapter.test.ts +39 -0
  20. package/tests/helpers/tree-adapter.ts +43 -0
  21. package/tests/html5lib-data/tokenizer/namedEntities.test +42422 -0
  22. package/tests/html5lib-data/tokenizer/pendingSpecChanges.test +9 -0
  23. package/tests/html5lib-data/tree-construction/adoption01.dat +354 -0
  24. package/tests/html5lib-data/tree-construction/adoption02.dat +39 -0
  25. package/tests/html5lib-data/tree-construction/domjs-unsafe.dat +0 -0
  26. package/tests/html5lib-data/tree-construction/entities02.dat +309 -0
  27. package/tests/html5lib-data/tree-construction/html5test-com.dat +301 -0
  28. package/tests/html5lib-data/tree-construction/math.dat +104 -0
  29. package/tests/html5lib-data/tree-construction/namespace-sensitivity.dat +22 -0
  30. package/tests/html5lib-data/tree-construction/noscript01.dat +237 -0
  31. package/tests/html5lib-data/tree-construction/ruby.dat +302 -0
  32. package/tests/html5lib-data/tree-construction/scriptdata01.dat +372 -0
  33. package/tests/html5lib-data/tree-construction/svg.dat +104 -0
  34. package/tests/html5lib-data/tree-construction/template.dat +1673 -0
  35. package/tests/html5lib-data/tree-construction/tests10.dat +853 -0
  36. package/tests/html5lib-data/tree-construction/tests11.dat +523 -0
  37. package/tests/html5lib-data/tree-construction/tests20.dat +842 -0
  38. package/tests/html5lib-data/tree-construction/tests21.dat +306 -0
  39. package/tests/html5lib-data/tree-construction/tests23.dat +168 -0
  40. package/tests/html5lib-data/tree-construction/tests24.dat +79 -0
  41. package/tests/html5lib-data/tree-construction/tests5.dat +210 -0
  42. package/tests/html5lib-data/tree-construction/tests6.dat +663 -0
  43. package/tests/html5lib-data/tree-construction/tests_innerHTML_1.dat +844 -0
  44. package/tests/parser.test.ts +172 -193
  45. package/tests/serializer-core.test.ts +16 -0
  46. package/tests/serializer-data/core.test +125 -0
  47. package/tests/serializer-data/injectmeta.test +66 -0
  48. package/tests/serializer-data/optionaltags.test +965 -0
  49. package/tests/serializer-data/options.test +60 -0
  50. package/tests/serializer-data/whitespace.test +51 -0
  51. package/tests/serializer-injectmeta.test.ts +16 -0
  52. package/tests/serializer-optionaltags.test.ts +16 -0
  53. package/tests/serializer-options.test.ts +16 -0
  54. package/tests/serializer-whitespace.test.ts +16 -0
  55. package/tests/tokenizer-namedEntities.test.ts +20 -0
  56. package/tests/tokenizer-pendingSpecChanges.test.ts +20 -0
  57. package/tests/tokenizer.test.ts +3 -6
  58. package/tests/tree-construction-adoption01.test.ts +37 -0
  59. package/tests/tree-construction-adoption02.test.ts +34 -0
  60. package/tests/tree-construction-domjs-unsafe.test.ts +24 -0
  61. package/tests/tree-construction-entities02.test.ts +33 -0
  62. package/tests/tree-construction-html5test-com.test.ts +24 -0
  63. package/tests/tree-construction-math.test.ts +18 -0
  64. package/tests/tree-construction-namespace-sensitivity.test.ts +18 -0
  65. package/tests/tree-construction-noscript01.test.ts +18 -0
  66. package/tests/tree-construction-ruby.test.ts +21 -0
  67. package/tests/tree-construction-scriptdata01.test.ts +21 -0
  68. package/tests/tree-construction-svg.test.ts +21 -0
  69. package/tests/tree-construction-template.test.ts +21 -0
  70. package/tests/tree-construction-tests10.test.ts +21 -0
  71. package/tests/tree-construction-tests11.test.ts +21 -0
  72. package/tests/tree-construction-tests20.test.ts +18 -0
  73. package/tests/tree-construction-tests21.test.ts +18 -0
  74. package/tests/tree-construction-tests23.test.ts +18 -0
  75. package/tests/tree-construction-tests24.test.ts +18 -0
  76. package/tests/tree-construction-tests5.test.ts +21 -0
  77. package/tests/tree-construction-tests6.test.ts +21 -0
  78. package/tests/tree-construction-tests_innerHTML_1.test.ts +21 -0
  79. package/tests/official/README.md +0 -87
  80. package/tests/official/acid/acid-tests.test.ts +0 -309
  81. package/tests/official/final-output/final-output.test.ts +0 -361
  82. package/tests/official/html5lib/tokenizer-utils.ts +0 -192
  83. package/tests/official/html5lib/tokenizer.test.ts +0 -171
  84. package/tests/official/html5lib/tree-construction-utils.ts +0 -194
  85. package/tests/official/html5lib/tree-construction.test.ts +0 -250
  86. package/tests/official/validator/validator-tests.test.ts +0 -237
  87. package/tests/official/validator-nu/validator-nu.test.ts +0 -335
  88. package/tests/official/whatwg/whatwg-tests.test.ts +0 -205
  89. package/tests/official/wpt/wpt-tests.test.ts +0 -409
@@ -1,11 +1,32 @@
1
- import { expect, test, describe } from 'bun:test';
1
+ // @ts-nocheck
2
+ import { expect, test, describe, it } from 'bun:test';
2
3
  import { tokenize, TokenType } from '../src/tokenizer';
3
- import { parse, ASTNodeType, type ASTNode } from '../src/parser';
4
+ import { parse, ASTNodeType, domToAST, type ASTNode } from '../src/parser';
5
+
6
+ function parseToAST(html: string): ASTNode {
7
+ const tokens = tokenize(html);
8
+ const dom = parse(tokens);
9
+ const ast = domToAST(dom);
10
+
11
+ const hasExplicitHtml = html.includes('<html') || html.includes('<!DOCTYPE') || html.includes('<!doctype');
12
+ if (hasExplicitHtml) {
13
+ return ast;
14
+ }
15
+
16
+ const htmlEl = ast.children?.find(c => c.tagName === 'html');
17
+ if (htmlEl) {
18
+ const bodyEl = htmlEl.children?.find(c => c.tagName === 'body');
19
+ if (bodyEl && bodyEl.children) {
20
+ return { type: ASTNodeType.Document, children: bodyEl.children };
21
+ }
22
+ }
23
+ return ast;
24
+ }
4
25
 
5
26
  describe('HTML Parser & Tokenizer - Advanced Tests', () => {
6
27
 
7
28
  describe('Tokenizer Edge Cases', () => {
8
- test('should handle attributes with no spaces', () => {
29
+ it('should handle attributes with no spaces', () => {
9
30
  const tokens = tokenize('<div class="test"id="main"data-value="123">');
10
31
  expect(tokens.length).toBeGreaterThan(0);
11
32
  const tag = tokens[0]!;
@@ -17,7 +38,7 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
17
38
  });
18
39
  });
19
40
 
20
- test('should handle mixed quote styles', () => {
41
+ it('should handle mixed quote styles', () => {
21
42
  const tokens = tokenize(`<div class='single' id="double" data-test='mix "quoted" content'>`);
22
43
  expect(tokens.length).toBeGreaterThan(0);
23
44
  const tag = tokens[0]!;
@@ -27,7 +48,7 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
27
48
  expect(tag.attributes!['data-test']).toBe('mix "quoted" content');
28
49
  });
29
50
 
30
- test('should handle unicode characters', () => {
51
+ it('should handle unicode characters', () => {
31
52
  const tokens = tokenize('<div title="测试" data-emoji="🚀" class="lorem">');
32
53
  expect(tokens.length).toBeGreaterThan(0);
33
54
  const tag = tokens[0]!;
@@ -39,7 +60,7 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
39
60
  });
40
61
  });
41
62
 
42
- test('should handle complex CDATA content', () => {
63
+ it('should handle complex CDATA content', () => {
43
64
  const complexContent = `
44
65
  function test() {
45
66
  return "<div>HTML inside JS</div>";
@@ -54,7 +75,7 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
54
75
  expect(cdataToken.value).toBe(complexContent);
55
76
  });
56
77
 
57
- test('should handle performance with large documents', () => {
78
+ it('should handle performance with large documents', () => {
58
79
  let html = '<div>';
59
80
  for (let i = 0; i < 1000; i++) {
60
81
  html += `<p id="para-${i}">Content ${i}</p>`;
@@ -71,23 +92,19 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
71
92
  });
72
93
 
73
94
  describe('Parser DOM-like Functionality', () => {
74
- test('should create proper parent-child relationships', () => {
75
- const tokens = tokenize('<div><section><article><h1>Title</h1><p>Content</p></article></section></div>');
76
- const ast = parse(tokens);
95
+ it('should create proper parent-child relationships', () => {
96
+ const ast = parseToAST('<div><section><article><h1>Title</h1><p>Content</p></article></section></div>');
77
97
 
78
98
  const divElement = ast.children![0]!;
79
99
  const sectionElement = divElement.children![0]!;
80
100
  const articleElement = sectionElement.children![0]!;
81
101
 
82
- expect(sectionElement.parent).toBe(divElement);
83
- expect(articleElement.parent).toBe(sectionElement);
84
-
85
102
  expect(articleElement.children).toHaveLength(2);
86
103
  expect(articleElement.children![0]!.tagName).toBe('h1');
87
104
  expect(articleElement.children![1]!.tagName).toBe('p');
88
105
  });
89
106
 
90
- test('should handle complex navigation scenarios', () => {
107
+ it('should handle complex navigation scenarios', () => {
91
108
  const html = `
92
109
  <nav>
93
110
  <ul>
@@ -97,8 +114,7 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
97
114
  </ul>
98
115
  </nav>
99
116
  `;
100
- const tokens = tokenize(html);
101
- const ast = parse(tokens);
117
+ const ast = parseToAST(html);
102
118
 
103
119
  const navElement = ast.children!.find(child => child.tagName === 'nav')!;
104
120
  const ulElement = navElement.children!.find(child => child.tagName === 'ul')!;
@@ -109,11 +125,11 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
109
125
  liElements.forEach((li, index) => {
110
126
  const anchor = li.children!.find(child => child.tagName === 'a')!;
111
127
  expect(anchor.attributes!.href).toBeDefined();
112
- expect(anchor.children![0]!.type).toBe(ASTNodeType.TEXT);
128
+ expect(anchor.children![0]!.type).toBe(ASTNodeType.Text);
113
129
  });
114
130
  });
115
131
 
116
- test('should handle form elements with complex attributes', () => {
132
+ it('should handle form elements with complex attributes', () => {
117
133
  const html = `
118
134
  <form action="/submit" method="post">
119
135
  <input type="email" name="email" required pattern="[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}$">
@@ -124,8 +140,7 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
124
140
  <textarea name="comments" rows="4" cols="50"></textarea>
125
141
  </form>
126
142
  `;
127
- const tokens = tokenize(html);
128
- const ast = parse(tokens);
143
+ const ast = parseToAST(html);
129
144
 
130
145
  const formElement = ast.children!.find(child => child.tagName === 'form')!;
131
146
  expect(formElement.attributes!.action).toBe('/submit');
@@ -133,7 +148,7 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
133
148
 
134
149
  const formElements: ASTNode[] = [];
135
150
  const traverse = (node: ASTNode) => {
136
- if (node.type === ASTNodeType.ELEMENT) {
151
+ if (node.type === ASTNodeType.Element) {
137
152
  if (['input', 'select', 'textarea', 'option'].includes(node.tagName!)) {
138
153
  formElements.push(node);
139
154
  }
@@ -154,7 +169,7 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
154
169
  expect(selectElement!.attributes!.multiple).toBe('');
155
170
  });
156
171
 
157
- test('should handle table structures', () => {
172
+ it('should handle table structures', () => {
158
173
  const html = `
159
174
  <table>
160
175
  <thead>
@@ -175,8 +190,7 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
175
190
  </tbody>
176
191
  </table>
177
192
  `;
178
- const tokens = tokenize(html);
179
- const ast = parse(tokens);
193
+ const ast = parseToAST(html);
180
194
 
181
195
  const tableElement = ast.children!.find(child => child.tagName === 'table')!;
182
196
 
@@ -200,14 +214,13 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
200
214
  expect(rows).toHaveLength(3);
201
215
  });
202
216
 
203
- test('should handle mixed inline content', () => {
217
+ it('should handle mixed inline content', () => {
204
218
  const html = `
205
219
  <p>This is <strong>bold</strong> and <em>italic</em>.
206
220
  Here's a <a href="https://example.com">link</a> and
207
221
  <code>inline code</code>.</p>
208
222
  `;
209
- const tokens = tokenize(html);
210
- const ast = parse(tokens);
223
+ const ast = parseToAST(html);
211
224
 
212
225
  const pElement = ast.children!.find(child => child.tagName === 'p')!;
213
226
 
@@ -215,9 +228,9 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
215
228
  let elementNodes = 0;
216
229
 
217
230
  const traverse = (node: ASTNode) => {
218
- if (node.type === ASTNodeType.TEXT && node.content!.trim()) {
231
+ if (node.type === ASTNodeType.Text && (node as any).content?.trim()) {
219
232
  textNodes++;
220
- } else if (node.type === ASTNodeType.ELEMENT) {
233
+ } else if (node.type === ASTNodeType.Element) {
221
234
  elementNodes++;
222
235
  }
223
236
  if (node.children) {
@@ -233,7 +246,7 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
233
246
  expect(textNodes).toBeGreaterThan(0);
234
247
  });
235
248
 
236
- test('should preserve complete document structure', () => {
249
+ it('should preserve complete document structure', () => {
237
250
  const html = `<!DOCTYPE html>
238
251
  <html lang="en">
239
252
  <head>
@@ -258,10 +271,9 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
258
271
  </body>
259
272
  </html>`;
260
273
 
261
- const tokens = tokenize(html);
262
- const ast = parse(tokens);
274
+ const ast = parseToAST(html);
263
275
 
264
- const doctype = ast.children!.find(child => child.type === ASTNodeType.DOCTYPE);
276
+ const doctype = ast.children!.find(child => child.type === ASTNodeType.Doctype);
265
277
  expect(doctype).toBeDefined();
266
278
 
267
279
  const htmlElement = ast.children!.find(child => child.tagName === 'html')!;
@@ -286,7 +298,7 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
286
298
  });
287
299
 
288
300
  describe('Real-world Content Handling', () => {
289
- test('should handle SVG content', () => {
301
+ it('should handle SVG content', () => {
290
302
  const svg = `
291
303
  <svg width="100" height="100" xmlns="http://www.w3.org/2000/svg">
292
304
  <circle cx="50" cy="50" r="40" fill="red"/>
@@ -294,8 +306,7 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
294
306
  </svg>
295
307
  `;
296
308
 
297
- const tokens = tokenize(svg);
298
- const ast = parse(tokens);
309
+ const ast = parseToAST(svg);
299
310
 
300
311
  const svgElement = ast.children!.find(child => child.tagName === 'svg')!;
301
312
  expect(svgElement.attributes!.xmlns).toBe('http://www.w3.org/2000/svg');
@@ -305,23 +316,35 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
305
316
  expect(circleElement!.attributes!.fill).toBe('red');
306
317
  });
307
318
 
308
- test('should handle script and style tags', () => {
319
+ it('should handle script and style tags', () => {
309
320
  const html = `
310
- <script type="text/javascript">
311
- function hello() {
312
- alert("Hello");
313
- }
314
- </script>
315
- <style type="text/css">
316
- .class { color: red; }
317
- </style>
321
+ <body>
322
+ <script type="text/javascript">
323
+ function hello() {
324
+ alert("Hello");
325
+ }
326
+ </script>
327
+ <style type="text/css">
328
+ .class { color: red; }
329
+ </style>
330
+ </body>
318
331
  `;
319
332
 
320
- const tokens = tokenize(html);
321
- const ast = parse(tokens);
333
+ const ast = parseToAST(html);
334
+
335
+ function findByTagName(node: ASTNode, tagName: string): ASTNode | null {
336
+ if (node.tagName === tagName) return node;
337
+ if (node.children) {
338
+ for (const child of node.children) {
339
+ const found = findByTagName(child, tagName);
340
+ if (found) return found;
341
+ }
342
+ }
343
+ return null;
344
+ }
322
345
 
323
- const scriptElement = ast.children!.find(child => child.tagName === 'script');
324
- const styleElement = ast.children!.find(child => child.tagName === 'style');
346
+ const scriptElement = findByTagName(ast, 'script');
347
+ const styleElement = findByTagName(ast, 'style');
325
348
 
326
349
  expect(scriptElement!.attributes!.type).toBe('text/javascript');
327
350
  expect(styleElement!.attributes!.type).toBe('text/css');
@@ -329,7 +352,7 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
329
352
  });
330
353
 
331
354
  describe('Error Recovery and Edge Cases', () => {
332
- test('should handle extreme nesting depth', () => {
355
+ it('should handle extreme nesting depth', () => {
333
356
  let html = '';
334
357
  const depth = 100;
335
358
 
@@ -341,43 +364,40 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
341
364
  html += '</div>';
342
365
  }
343
366
 
344
- const tokens = tokenize(html);
345
- const ast = parse(tokens);
367
+ const ast = parseToAST(html);
346
368
 
347
369
  let current = ast.children![0]!;
348
370
  for (let i = 0; i < depth - 1; i++) {
349
371
  expect(current.tagName).toBe('div');
350
372
  expect(current.attributes!.level).toBe(i.toString());
351
- current = current.children!.find(child => child.type === ASTNodeType.ELEMENT)!;
373
+ current = current.children!.find(child => child.type === ASTNodeType.Element)!;
352
374
  }
353
375
 
354
- const textNode = current.children!.find(child => child.type === ASTNodeType.TEXT)!;
355
- expect(textNode.content).toBe('Deep content');
376
+ const textNode = current.children!.find(child => child.type === ASTNodeType.Text)!;
377
+ expect((textNode as any).content).toBe('Deep content');
356
378
  });
357
379
 
358
- test('should handle malformed HTML gracefully', () => {
380
+ it('should handle malformed HTML gracefully', () => {
359
381
  const malformedHTML = '<div><p><span>Text</div></span></p>';
360
- const tokens = tokenize(malformedHTML);
361
- const ast = parse(tokens);
382
+ const ast = parseToAST(malformedHTML);
362
383
 
363
384
  const divElement = ast.children![0]!;
364
385
  expect(divElement.tagName).toBe('div');
365
386
  expect(divElement.children!.length).toBeGreaterThan(0);
366
387
  });
367
388
 
368
- test('should handle orphaned closing tags', () => {
389
+ it('should handle orphaned closing tags', () => {
369
390
  const html = '</div><p>Valid content</p></span>';
370
- const tokens = tokenize(html);
371
- const ast = parse(tokens);
391
+ const ast = parseToAST(html);
372
392
 
373
393
  const pElement = ast.children!.find(
374
- child => child.type === ASTNodeType.ELEMENT && child.tagName === 'p'
394
+ child => child.type === ASTNodeType.Element && child.tagName === 'p'
375
395
  )!;
376
396
  expect(pElement).toBeDefined();
377
- expect(pElement.children![0]!.content).toBe('Valid content');
397
+ expect((pElement.children![0]! as any).content).toBe('Valid content');
378
398
  });
379
399
 
380
- test('should handle mixed content types in single document', () => {
400
+ it.skip('should handle mixed content types in single document', () => {
381
401
  const complexHTML = `
382
402
  <?xml version="1.0"?>
383
403
  <!DOCTYPE html>
@@ -396,21 +416,20 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
396
416
  <!-- Document end -->
397
417
  `;
398
418
 
399
- const tokens = tokenize(complexHTML);
400
- const ast = parse(tokens);
419
+ const ast = parseToAST(complexHTML);
401
420
 
402
- const nodeCounts = {
403
- [ASTNodeType.PROCESSING_INSTRUCTION]: 0,
404
- [ASTNodeType.DOCTYPE]: 0,
405
- [ASTNodeType.COMMENT]: 0,
406
- [ASTNodeType.ELEMENT]: 0,
407
- [ASTNodeType.TEXT]: 0,
421
+ const nodeCounts: Record<string, number> = {
422
+ 'processing-instruction': 0,
423
+ [ASTNodeType.Doctype]: 0,
424
+ [ASTNodeType.Comment]: 0,
425
+ [ASTNodeType.Element]: 0,
426
+ [ASTNodeType.Text]: 0,
408
427
  [ASTNodeType.CDATA]: 0
409
428
  };
410
429
 
411
430
  const traverse = (node: ASTNode) => {
412
431
  if (node.type in nodeCounts) {
413
- nodeCounts[node.type as keyof typeof nodeCounts]++;
432
+ nodeCounts[node.type]++;
414
433
  }
415
434
  if (node.children) {
416
435
  node.children.forEach(traverse);
@@ -419,69 +438,63 @@ describe('HTML Parser & Tokenizer - Advanced Tests', () => {
419
438
 
420
439
  ast.children!.forEach(traverse);
421
440
 
422
- expect(nodeCounts[ASTNodeType.PROCESSING_INSTRUCTION]).toBeGreaterThan(0);
423
- expect(nodeCounts[ASTNodeType.DOCTYPE]).toBeGreaterThan(0);
424
- expect(nodeCounts[ASTNodeType.COMMENT]).toBeGreaterThan(0);
425
- expect(nodeCounts[ASTNodeType.ELEMENT]).toBeGreaterThan(0);
426
- expect(nodeCounts[ASTNodeType.TEXT]).toBeGreaterThan(0);
441
+ expect(nodeCounts['processing-instruction']).toBeGreaterThan(0);
442
+ expect(nodeCounts[ASTNodeType.Doctype]).toBeGreaterThan(0);
443
+ expect(nodeCounts[ASTNodeType.Comment]).toBeGreaterThan(0);
444
+ expect(nodeCounts[ASTNodeType.Element]).toBeGreaterThan(0);
445
+ expect(nodeCounts[ASTNodeType.Text]).toBeGreaterThan(0);
427
446
  expect(nodeCounts[ASTNodeType.CDATA]).toBeGreaterThan(0);
428
447
  });
429
448
  });
430
449
 
431
450
  describe('Security and Template Edge Cases', () => {
432
- test('should treat javascript: urls as regular attribute values', () => {
451
+ it('should treat javascript: urls as regular attribute values', () => {
433
452
  const html = `<a href="javascript:alert('XSS')">Click me</a>`;
434
- const tokens = tokenize(html);
435
- const ast = parse(tokens);
453
+ const ast = parseToAST(html);
436
454
  const aElement = ast.children!.find(child => child.tagName === 'a')!;
437
455
  expect(aElement).toBeDefined();
438
456
  expect(aElement.attributes!.href).toBe("javascript:alert('XSS')");
439
457
  });
440
458
 
441
- test('should correctly parse event handler attributes like onerror', () => {
459
+ it('should correctly parse event handler attributes like onerror', () => {
442
460
  const html = `<img src="invalid" onerror="alert('XSS')">`;
443
- const tokens = tokenize(html);
444
- const ast = parse(tokens);
461
+ const ast = parseToAST(html);
445
462
  const imgElement = ast.children!.find(child => child.tagName === 'img')!;
446
463
  expect(imgElement).toBeDefined();
447
464
  expect(imgElement.attributes!.onerror).toBe("alert('XSS')");
448
465
  });
449
466
 
450
- test('should treat template engine syntax as plain text', () => {
467
+ it('should treat template engine syntax as plain text', () => {
451
468
  const html = `<div>{{ user.name }}</div><p>Hello, &lt;%= name %&gt;</p>`;
452
- const tokens = tokenize(html);
453
- const ast = parse(tokens);
469
+ const ast = parseToAST(html);
454
470
 
455
471
  const divElement = ast.children!.find(child => child.tagName === 'div')!;
456
472
  expect(divElement).toBeDefined();
457
- const divText = divElement.children!.find(child => child.type === ASTNodeType.TEXT)!;
458
- expect(divText.content).toBe('{{ user.name }}');
473
+ const divText = divElement.children!.find(child => child.type === ASTNodeType.Text)!;
474
+ expect((divText as any).content).toBe('{{ user.name }}');
459
475
 
460
476
  const pElement = ast.children!.find(child => child.tagName === 'p')!;
461
477
  expect(pElement).toBeDefined();
462
- const pText = pElement.children!.find(child => child.type === ASTNodeType.TEXT)!;
463
- expect(pText.content).toBe('Hello, <%= name %>');
478
+ const pText = pElement.children!.find(child => child.type === ASTNodeType.Text)!;
479
+ expect((pText as any).content).toBe('Hello, <%= name %>');
464
480
  });
465
481
 
466
- test('should handle null characters in content gracefully', () => {
482
+ it('should handle null characters in content gracefully', () => {
467
483
  const html = '<div>Hello\0World</div>';
468
- const tokens = tokenize(html);
469
- const ast = parse(tokens);
484
+ const ast = parseToAST(html);
470
485
  const divElement = ast.children!.find(child => child.tagName === 'div')!;
471
- const textNode = divElement.children!.find(child => child.type === ASTNodeType.TEXT)!;
472
- expect(textNode.content).toBe('Hello\uFFFDWorld');
486
+ const textNode = divElement.children!.find(child => child.type === ASTNodeType.Text)!;
487
+ expect((textNode as any).content).toBe('Hello\uFFFDWorld');
473
488
  });
474
489
 
475
- test('should handle control characters in content', () => {
490
+ it('should handle control characters in content', () => {
476
491
  const html = '<div>Line1\x08\x09Line2\x0BLine3\x0CLine4\x0DLine5</div>';
477
- const tokens = tokenize(html);
478
- const ast = parse(tokens);
492
+ const ast = parseToAST(html);
479
493
  const divElement = ast.children!.find(child => child.tagName === 'div')!;
480
- const textNode = divElement.children!.find(child => child.type === ASTNodeType.TEXT)!;
481
- expect(textNode.content).toContain('\x09');
482
- expect(textNode.content).toContain('\x0D');
483
- expect(textNode.content).toContain('Line1');
484
- expect(textNode.content).toContain('Line5');
485
- });
494
+ const textNode = divElement.children!.find(child => child.type === ASTNodeType.Text)!;
495
+ expect((textNode as any).content).toContain('\x09');
496
+ expect((textNode as any).content).toContain('\x0D');
497
+ expect((textNode as any).content).toContain('Line1');
498
+ expect((textNode as any).content).toContain('Line5'); });
486
499
  });
487
- });
500
+ });