@tkeron/html-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,666 @@
1
+ import { expect, it, describe } from 'bun:test';
2
+ import {
3
+ tokenize,
4
+ TokenType,
5
+ type Token
6
+ } from '../src/tokenizer';
7
+
8
+ describe('HTML Tokenizer', () => {
9
+
10
+ describe('Basic Tags', () => {
11
+ it('should tokenize simple opening tag', () => {
12
+ const tokens = tokenize('<div>');
13
+
14
+ expect(tokens).toHaveLength(2);
15
+ expect(tokens[0]!).toEqual({
16
+ type: TokenType.TAG_OPEN,
17
+ value: 'div',
18
+ position: expect.any(Object),
19
+ attributes: {},
20
+ isSelfClosing: false
21
+ });
22
+ expect(tokens[1]!.type).toBe(TokenType.EOF);
23
+ });
24
+
25
+ it('should tokenize simple closing tag', () => {
26
+ const tokens = tokenize('</div>');
27
+
28
+ expect(tokens).toHaveLength(2);
29
+ expect(tokens[0]!).toEqual({
30
+ type: TokenType.TAG_CLOSE,
31
+ value: 'div',
32
+ position: expect.any(Object),
33
+ isClosing: true
34
+ });
35
+ });
36
+
37
+ it('should tokenize self-closing tag', () => {
38
+ const tokens = tokenize('<img/>');
39
+
40
+ expect(tokens).toHaveLength(2);
41
+ expect(tokens[0]!).toEqual({
42
+ type: TokenType.TAG_OPEN,
43
+ value: 'img',
44
+ position: expect.any(Object),
45
+ attributes: {},
46
+ isSelfClosing: true
47
+ });
48
+ });
49
+
50
+ it('should handle case insensitive tag names', () => {
51
+ const tokens = tokenize('<DIV></DIV>');
52
+
53
+ expect(tokens[0]!.value).toBe('div');
54
+ expect(tokens[1]!.value).toBe('div');
55
+ });
56
+ });
57
+
58
+ describe('Attributes', () => {
59
+ it('should parse attributes with double quotes', () => {
60
+ const tokens = tokenize('<div class="container" id="main">');
61
+
62
+ expect(tokens[0]?.attributes).toEqual({
63
+ class: 'container',
64
+ id: 'main'
65
+ });
66
+ });
67
+
68
+ it('should parse attributes with single quotes', () => {
69
+ const tokens = tokenize(`<div class='container' id='main'>`);
70
+
71
+ expect(tokens[0]?.attributes).toEqual({
72
+ class: 'container',
73
+ id: 'main'
74
+ });
75
+ });
76
+
77
+ it('should parse unquoted attributes', () => {
78
+ const tokens = tokenize('<div class=container id=main>');
79
+
80
+ expect(tokens[0]?.attributes).toEqual({
81
+ class: 'container',
82
+ id: 'main'
83
+ });
84
+ });
85
+
86
+ it('should parse boolean attributes', () => {
87
+ const tokens = tokenize('<input disabled checked>');
88
+
89
+ expect(tokens[0]?.attributes).toEqual({
90
+ disabled: '',
91
+ checked: ''
92
+ });
93
+ });
94
+
95
+ it('should handle mixed attribute types', () => {
96
+ const tokens = tokenize('<input type="text" disabled value=test>');
97
+
98
+ expect(tokens[0]?.attributes).toEqual({
99
+ type: 'text',
100
+ disabled: '',
101
+ value: 'test'
102
+ });
103
+ });
104
+
105
+ it('should handle attributes with special characters', () => {
106
+ const tokens = tokenize('<div data-test="value" aria-label="test">');
107
+
108
+ expect(tokens[0]?.attributes).toEqual({
109
+ 'data-test': 'value',
110
+ 'aria-label': 'test'
111
+ });
112
+ });
113
+ });
114
+
115
+ describe('Text Content', () => {
116
+ it('should tokenize plain text', () => {
117
+ const tokens = tokenize('Hello World');
118
+
119
+ expect(tokens).toHaveLength(2);
120
+ expect(tokens[0]).toEqual({
121
+ type: TokenType.TEXT,
122
+ value: 'Hello World',
123
+ position: expect.any(Object)
124
+ });
125
+ });
126
+
127
+ it('should handle text with whitespace', () => {
128
+ const tokens = tokenize(' Hello World ');
129
+
130
+ expect(tokens[0]?.value).toBe(' Hello World ');
131
+ });
132
+
133
+ it('should handle multiline text', () => {
134
+ const tokens = tokenize('Line 1\nLine 2\nLine 3');
135
+
136
+ expect(tokens[0]?.value).toBe('Line 1\nLine 2\nLine 3');
137
+ });
138
+ });
139
+
140
+ describe('HTML Entities', () => {
141
+ it('should parse named entities', () => {
142
+ const tokens = tokenize('&amp; &lt; &gt; &quot; &nbsp;');
143
+
144
+ expect(tokens[0]?.value).toBe('& < > " \u00A0');
145
+ });
146
+
147
+ it('should parse numeric entities', () => {
148
+ const tokens = tokenize('&#65; &#66; &#67;');
149
+
150
+ expect(tokens[0]?.value).toBe('A B C');
151
+ });
152
+
153
+ it('should parse hexadecimal entities', () => {
154
+ const tokens = tokenize('&#x41; &#x42; &#x43;');
155
+
156
+ expect(tokens[0]?.value).toBe('A B C');
157
+ });
158
+
159
+ it('should handle entities in attributes', () => {
160
+ const tokens = tokenize('<div title="&quot;Hello&quot;">');
161
+
162
+ expect(tokens[0]?.attributes!.title).toBe('"Hello"');
163
+ });
164
+
165
+ it('should handle unknown entities', () => {
166
+ const tokens = tokenize('&unknown;');
167
+
168
+ expect(tokens[0]?.value).toBe('&unknown;');
169
+ });
170
+ });
171
+
172
+ describe('Comments', () => {
173
+ it('should parse HTML comments', () => {
174
+ const tokens = tokenize('<!-- This is a comment -->');
175
+
176
+ expect(tokens[0]).toEqual({
177
+ type: TokenType.COMMENT,
178
+ value: ' This is a comment ',
179
+ position: expect.any(Object)
180
+ });
181
+ });
182
+
183
+ it('should handle multiline comments', () => {
184
+ const tokens = tokenize(`<!-- \n Multi line\n comment\n -->`);
185
+
186
+ expect(tokens[0]?.type).toBe(TokenType.COMMENT);
187
+ expect(tokens[0]?.value).toContain('Multi line');
188
+ });
189
+
190
+ it('should handle empty comments', () => {
191
+ const tokens = tokenize('<!---->');
192
+
193
+ expect(tokens[0]).toEqual({
194
+ type: TokenType.COMMENT,
195
+ value: '',
196
+ position: expect.any(Object)
197
+ });
198
+ });
199
+ });
200
+
201
+ describe('CDATA Sections', () => {
202
+ it('should parse CDATA sections', () => {
203
+ const tokens = tokenize('<![CDATA[Some data]]>');
204
+
205
+ expect(tokens[0]).toEqual({
206
+ type: TokenType.CDATA,
207
+ value: 'Some data',
208
+ position: expect.any(Object)
209
+ });
210
+ });
211
+
212
+ it('should handle CDATA with special characters', () => {
213
+ const tokens = tokenize('<![CDATA[<script>alert("test");</script>]]>');
214
+
215
+ expect(tokens[0]?.value).toBe('<script>alert("test");</script>');
216
+ });
217
+ });
218
+
219
+ describe('DOCTYPE Declaration', () => {
220
+ it('should parse DOCTYPE declaration', () => {
221
+ const tokens = tokenize('<!DOCTYPE html>');
222
+
223
+ expect(tokens[0]).toEqual({
224
+ type: TokenType.DOCTYPE,
225
+ value: 'html',
226
+ position: expect.any(Object)
227
+ });
228
+ });
229
+
230
+ it('should parse complex DOCTYPE', () => {
231
+ const tokens = tokenize('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">');
232
+
233
+ expect(tokens[0]?.type).toBe(TokenType.DOCTYPE);
234
+ expect(tokens[0]?.value).toBe('html');
235
+ });
236
+ });
237
+
238
+ describe('Processing Instructions', () => {
239
+ it('should parse XML processing instruction', () => {
240
+ const tokens = tokenize('<?xml version="1.0" encoding="UTF-8"?>');
241
+
242
+ expect(tokens[0]).toEqual({
243
+ type: TokenType.PROCESSING_INSTRUCTION,
244
+ value: '<?xml version="1.0" encoding="UTF-8"',
245
+ position: expect.any(Object)
246
+ });
247
+ });
248
+
249
+ it('should parse PHP-style processing instruction', () => {
250
+ const tokens = tokenize('<?php echo "Hello"; ?>');
251
+
252
+ expect(tokens[0]?.type).toBe(TokenType.PROCESSING_INSTRUCTION);
253
+ expect(tokens[0]?.value).toBe('<?php echo "Hello"; ');
254
+ });
255
+ });
256
+
257
+ describe('Complex HTML Documents', () => {
258
+ it('should tokenize complete HTML document', () => {
259
+ const html = `<!DOCTYPE html>
260
+ <html lang="en">
261
+ <head>
262
+ <title>Test</title>
263
+ </head>
264
+ <body>
265
+ <h1>Hello World</h1>
266
+ <p>This is a test.</p>
267
+ </body>
268
+ </html>`;
269
+
270
+ const tokens = tokenize(html);
271
+
272
+ expect(tokens.length).toBeGreaterThan(10);
273
+ expect(tokens[0]?.type).toBe(TokenType.DOCTYPE);
274
+ expect(tokens[tokens?.length - 1]?.type).toBe(TokenType.EOF);
275
+
276
+ const htmlTag = tokens.find(t => t.type === TokenType.TAG_OPEN && t.value === 'html');
277
+ expect(htmlTag).toBeDefined();
278
+ expect(htmlTag!.attributes!.lang).toBe('en');
279
+ });
280
+
281
+ it('should handle mixed content', () => {
282
+ const html = `<div>
283
+ Text before <!-- comment -->
284
+ <span>nested</span>
285
+ Text after &amp; entity
286
+ </div>`;
287
+
288
+ const tokens = tokenize(html);
289
+
290
+ expect(tokens.some(t => t.type === TokenType.TAG_OPEN)).toBe(true);
291
+ expect(tokens.some(t => t.type === TokenType.TEXT)).toBe(true);
292
+ expect(tokens.some(t => t.type === TokenType.COMMENT)).toBe(true);
293
+ });
294
+ });
295
+
296
+ describe('Edge Cases', () => {
297
+ it('should handle empty input', () => {
298
+ const tokens = tokenize('');
299
+
300
+ expect(tokens).toHaveLength(1);
301
+ expect(tokens[0]?.type).toBe(TokenType.EOF);
302
+ });
303
+
304
+ it('should handle whitespace only', () => {
305
+ const tokens = tokenize(' \n\t ');
306
+
307
+ expect(tokens).toHaveLength(2);
308
+ expect(tokens[0]?.type).toBe(TokenType.TEXT);
309
+ expect(tokens[0]?.value).toBe(' \n\t ');
310
+ });
311
+
312
+ it('should handle malformed tags', () => {
313
+ const tokens = tokenize('<div class="test>');
314
+
315
+ expect(tokens[0]?.type).toBe(TokenType.TAG_OPEN);
316
+ expect(tokens[0]?.value).toBe('div');
317
+ });
318
+
319
+ it('should handle unclosed comments', () => {
320
+ const tokens = tokenize('<!-- unclosed comment');
321
+
322
+ expect(tokens[0]?.type).toBe(TokenType.COMMENT);
323
+ expect(tokens[0]?.value).toBe(' unclosed comment');
324
+ });
325
+ });
326
+
327
+ describe('Advanced Edge Cases', () => {
328
+ it('should handle attributes with no spaces', () => {
329
+ const tokens = tokenize('<div class="test"id="main"data-value="123">');
330
+ expect(tokens.length).toBeGreaterThan(0);
331
+ const tag = tokens[0]!;
332
+
333
+ expect(tag.attributes).toEqual({
334
+ class: 'test',
335
+ id: 'main',
336
+ 'data-value': '123'
337
+ });
338
+ });
339
+
340
+ it('should handle attributes with excessive spaces', () => {
341
+ const tokens = tokenize('<div class = "test" id = "main" >');
342
+ expect(tokens.length).toBeGreaterThan(0);
343
+ const tag = tokens[0]!;
344
+
345
+ expect(tag.attributes).toEqual({
346
+ class: 'test',
347
+ id: 'main'
348
+ });
349
+ });
350
+
351
+ it('should handle mixed quote styles in same tag', () => {
352
+ const tokens = tokenize(`<div class='single' id="double" data-test='mix "quoted" content'>`);
353
+ expect(tokens.length).toBeGreaterThan(0);
354
+ const tag = tokens[0]!;
355
+
356
+ expect(tag.attributes!.class).toBe('single');
357
+ expect(tag.attributes!.id).toBe('double');
358
+ expect(tag.attributes!['data-test']).toBe('mix "quoted" content');
359
+ });
360
+
361
+ it('should handle malformed quotes gracefully', () => {
362
+ const tokens = tokenize('<div class="unclosed id="test">');
363
+ expect(tokens.length).toBeGreaterThan(0);
364
+ const tag = tokens[0]!;
365
+
366
+ expect(tag.type).toBe(TokenType.TAG_OPEN);
367
+ expect(tag.value).toBe('div');
368
+ expect(tag.attributes).toBeDefined();
369
+ });
370
+
371
+ it('should handle empty tag names', () => {
372
+ const tokens = tokenize('<>content</>');
373
+
374
+ expect(tokens.length).toBeGreaterThan(0);
375
+ });
376
+
377
+ it('should handle tags with numbers and special characters', () => {
378
+ const tokens = tokenize('<h1 class="heading-1" data-level="1">');
379
+ expect(tokens.length).toBeGreaterThan(0);
380
+ const tag = tokens[0]!;
381
+
382
+ expect(tag.value).toBe('h1');
383
+ expect(tag.attributes).toEqual({
384
+ class: 'heading-1',
385
+ 'data-level': '1'
386
+ });
387
+ });
388
+
389
+ it('should handle extremely long attribute values', () => {
390
+ const longValue = 'a'.repeat(10000);
391
+ const tokens = tokenize(`<div data-long="${longValue}">`);
392
+ expect(tokens.length).toBeGreaterThan(0);
393
+ const tag = tokens[0]!;
394
+
395
+ expect(tag.attributes!['data-long']).toBe(longValue);
396
+ });
397
+
398
+ it('should handle unicode characters in attributes', () => {
399
+ const tokens = tokenize('<div title="测试" data-emoji="🚀" class="café">');
400
+ expect(tokens.length).toBeGreaterThan(0);
401
+ const tag = tokens[0]!;
402
+
403
+ expect(tag.attributes).toEqual({
404
+ title: '测试',
405
+ 'data-emoji': '🚀',
406
+ class: 'café'
407
+ });
408
+ });
409
+
410
+ it('should handle nested quotes in attributes', () => {
411
+ const tokens = tokenize(`<div onclick="alert('Hello')" title='She said "hi"'>`);
412
+ expect(tokens.length).toBeGreaterThan(0);
413
+ const tag = tokens[0]!;
414
+
415
+ expect(tag.attributes!.onclick).toBe(`alert('Hello')`);
416
+ expect(tag.attributes!.title).toBe('She said "hi"');
417
+ });
418
+
419
+ it('should handle attributes without values', () => {
420
+ const tokens = tokenize('<input type="checkbox" checked disabled readonly>');
421
+ expect(tokens.length).toBeGreaterThan(0);
422
+ const tag = tokens[0]!;
423
+
424
+ expect(tag.attributes).toEqual({
425
+ type: 'checkbox',
426
+ checked: '',
427
+ disabled: '',
428
+ readonly: ''
429
+ });
430
+ });
431
+
432
+ it('should handle CDATA with complex content', () => {
433
+ const complexContent = `
434
+ function it() {
435
+ return "<div>HTML inside JS</div>";
436
+ }
437
+ /* Comment with </script> */
438
+ var x = "String with <tags>";
439
+ `;
440
+ const tokens = tokenize(`<![CDATA[${complexContent}]]>`);
441
+ const cdataToken = tokens[0]!;
442
+
443
+ expect(cdataToken.type).toBe(TokenType.CDATA);
444
+ expect(cdataToken.value).toBe(complexContent);
445
+ });
446
+
447
+ it('should handle processing instructions with various formats', () => {
448
+ const tests = [
449
+ { input: '<?xml version="1.0" encoding="UTF-8"?>', expected: 'xml' },
450
+ { input: '<?xml-stylesheet type="text/xsl" href="style.xsl"?>', expected: 'xml' },
451
+ { input: '<?php echo "Hello World"; ?>', expected: 'php' },
452
+ { input: '<?python print("Hello") ?>', expected: 'python' }
453
+ ];
454
+
455
+ tests.forEach(test => {
456
+ const tokens = tokenize(test.input);
457
+ const piToken = tokens[0]!;
458
+
459
+ expect(piToken.type).toBe(TokenType.PROCESSING_INSTRUCTION);
460
+ expect(piToken.value.toLowerCase()).toContain(test.expected);
461
+ });
462
+ });
463
+
464
+ it('should handle comments with special content', () => {
465
+ const specialComments = [
466
+ '<!-- TODO: Fix this -->',
467
+ '<!-- <script>alert("xss")</script> -->',
468
+ '<!-- Multi\nline\ncomment -->',
469
+ '<!-- Comment with -- inside -->',
470
+ '<!--[if IE]><![endif]-->'
471
+ ];
472
+
473
+ specialComments.forEach(comment => {
474
+ const tokens = tokenize(comment);
475
+ const commentToken = tokens[0]!;
476
+
477
+ expect(commentToken.type).toBe(TokenType.COMMENT);
478
+ });
479
+ });
480
+
481
+ it('should handle mixed content with all token types', () => {
482
+ const html = `
483
+ <?xml version="1.0"?>
484
+ <!DOCTYPE html>
485
+ <!-- Main document -->
486
+ <html lang="en">
487
+ <head>
488
+ <title>Test &amp; Demo</title>
489
+ <![CDATA[Some raw data]]>
490
+ </head>
491
+ <body>
492
+ <h1>Hello World</h1>
493
+ <p>Text with <strong>bold</strong> content.</p>
494
+ <!-- End of body -->
495
+ </body>
496
+ </html>
497
+ <!-- End of document -->
498
+ `;
499
+
500
+ const tokens = tokenize(html);
501
+
502
+ const tokenCounts = {
503
+ [TokenType.PROCESSING_INSTRUCTION]: 0,
504
+ [TokenType.DOCTYPE]: 0,
505
+ [TokenType.COMMENT]: 0,
506
+ [TokenType.TAG_OPEN]: 0,
507
+ [TokenType.TAG_CLOSE]: 0,
508
+ [TokenType.TEXT]: 0,
509
+ [TokenType.CDATA]: 0,
510
+ [TokenType.EOF]: 0
511
+ };
512
+
513
+ tokens.forEach(token => {
514
+ tokenCounts[token.type]++;
515
+ });
516
+
517
+ expect(tokenCounts[TokenType.PROCESSING_INSTRUCTION]).toBeGreaterThan(0);
518
+ expect(tokenCounts[TokenType.DOCTYPE]).toBeGreaterThan(0);
519
+ expect(tokenCounts[TokenType.COMMENT]).toBeGreaterThan(0);
520
+ expect(tokenCounts[TokenType.TAG_OPEN]).toBeGreaterThan(0);
521
+ expect(tokenCounts[TokenType.TAG_CLOSE]).toBeGreaterThan(0);
522
+ expect(tokenCounts[TokenType.TEXT]).toBeGreaterThan(0);
523
+ expect(tokenCounts[TokenType.CDATA]).toBeGreaterThan(0);
524
+ expect(tokenCounts[TokenType.EOF]).toBe(1);
525
+ });
526
+ })
527
+
528
+ describe('Performance and Stress Tests', () => {
529
+ it('should handle very large documents', () => {
530
+
531
+ let html = '<div>';
532
+ for (let i = 0; i < 1000; i++) {
533
+ html += `<p id="para-${i}" class="paragraph">Paragraph ${i} content</p>`;
534
+ }
535
+ html += '</div>';
536
+
537
+ const startTime = Date.now();
538
+ const tokens = tokenize(html);
539
+ const endTime = Date.now();
540
+
541
+ expect(tokens.length).toBeGreaterThan(2000);
542
+ expect(endTime - startTime).toBeLessThan(1000);
543
+ });
544
+
545
+ it('should handle deeply nested structures', () => {
546
+ let html = '';
547
+ const depth = 100;
548
+
549
+ for (let i = 0; i < depth; i++) {
550
+ html += `<div level="${i}">`;
551
+ }
552
+ html += 'Content';
553
+ for (let i = 0; i < depth; i++) {
554
+ html += '</div>';
555
+ }
556
+
557
+ const tokens = tokenize(html);
558
+
559
+ expect(tokens.length).toBe(depth * 2 + 2);
560
+ });
561
+
562
+ it('should handle many attributes per element', () => {
563
+ let html = '<div';
564
+ for (let i = 0; i < 100; i++) {
565
+ html += ` attr-${i}="value-${i}"`;
566
+ }
567
+ html += '>';
568
+
569
+ const tokens = tokenize(html);
570
+ const divTag = tokens[0]!;
571
+
572
+ expect(Object.keys(divTag.attributes!).length).toBe(100);
573
+ expect(divTag.attributes!['attr-50']).toBe('value-50');
574
+ });
575
+ })
576
+
577
+ describe('Real-world Scenarios', () => {
578
+ it('should handle SVG elements', () => {
579
+ const svg = `
580
+ <svg width="100" height="100" xmlns="http://www.w3.org/2000/svg">
581
+ <circle cx="50" cy="50" r="40" stroke="black" stroke-width="3" fill="red"/>
582
+ <text x="50" y="50" text-anchor="middle">SVG</text>
583
+ </svg>
584
+ `;
585
+
586
+ const tokens = tokenize(svg);
587
+
588
+ const svgTag = tokens.find(token => token.value === 'svg')!;
589
+ expect(svgTag.attributes!.xmlns).toBe('http://www.w3.org/2000/svg');
590
+
591
+ const circleTag = tokens.find(token => token.value === 'circle')!;
592
+ expect(circleTag.isSelfClosing).toBe(true);
593
+ expect(circleTag.attributes!.fill).toBe('red');
594
+ });
595
+
596
+ it('should handle script and style tags', () => {
597
+ const html = `
598
+ <script type="text/javascript">
599
+ function hello() {
600
+ alert("Hello <world>");
601
+ }
602
+ </script>
603
+ <style type="text/css">
604
+ .class { color: red; }
605
+ /* Comment with <tags> */
606
+ </style>
607
+ `;
608
+
609
+ const tokens = tokenize(html);
610
+
611
+ const scriptTag = tokens.find(token => token.value === 'script')!;
612
+ const styleTag = tokens.find(token => token.value === 'style')!;
613
+
614
+ expect(scriptTag.attributes!.type).toBe('text/javascript');
615
+ expect(styleTag.attributes!.type).toBe('text/css');
616
+ });
617
+
618
+ it('should handle form elements with complex attributes', () => {
619
+ const html = `
620
+ <form method="POST" action="/submit" enctype="multipart/form-data">
621
+ <input type="email" name="email" required pattern="[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}$" title="Please enter a valid email">
622
+ <select name="country" size="1" multiple>
623
+ <option value="us" selected>United States</option>
624
+ <option value="ca">Canada</option>
625
+ </select>
626
+ </form>
627
+ `;
628
+
629
+ const tokens = tokenize(html);
630
+
631
+ const inputTag = tokens.find(token => token.value === 'input')!;
632
+ expect(inputTag.attributes!.pattern).toContain('@');
633
+ expect(inputTag.attributes!.required).toBe('');
634
+
635
+ const selectTag = tokens.find(token => token.value === 'select')!;
636
+ expect(selectTag.attributes!.multiple).toBe('');
637
+ });
638
+ })
639
+
640
+ describe('Error Recovery', () => {
641
+ it('should handle incomplete tags gracefully', () => {
642
+ const malformedHTML = '<div class="test><p>Content</p>';
643
+ const tokens = tokenize(malformedHTML);
644
+
645
+ expect(tokens.length).toBeGreaterThan(0);
646
+ expect(tokens[tokens.length - 1]!.type).toBe(TokenType.EOF);
647
+ });
648
+
649
+ it('should handle unmatched quotes in attributes', () => {
650
+ const html = '<div class="test id=\'main">Content</div>';
651
+ const tokens = tokenize(html);
652
+
653
+ const divTag = tokens.find(token => token.value === 'div')!;
654
+ expect(divTag).toBeDefined();
655
+ });
656
+
657
+ it('should continue parsing after errors', () => {
658
+ const html = '<div><p>Valid paragraph</p><span>Valid span</span>';
659
+ const tokens = tokenize(html);
660
+
661
+ const hasValidElements = tokens.some(token => token.value === 'p') ||
662
+ tokens.some(token => token.value === 'span');
663
+ expect(hasValidElements).toBe(true);
664
+ });
665
+ })
666
+ });
package/tsconfig.json ADDED
@@ -0,0 +1,25 @@
1
+ {
2
+ "compilerOptions": {
3
+ "lib": ["ESNext", "DOM"],
4
+ "target": "ESNext",
5
+ "module": "Preserve",
6
+ "moduleDetection": "force",
7
+ "jsx": "react-jsx",
8
+ "allowJs": true,
9
+
10
+ "moduleResolution": "bundler",
11
+ "allowImportingTsExtensions": true,
12
+ "verbatimModuleSyntax": true,
13
+ "noEmit": true,
14
+
15
+ "strict": true,
16
+ "skipLibCheck": true,
17
+ "noFallthroughCasesInSwitch": true,
18
+ "noUncheckedIndexedAccess": true,
19
+ "noImplicitOverride": true,
20
+
21
+ "noUnusedLocals": false,
22
+ "noUnusedParameters": false,
23
+ "noPropertyAccessFromIndexSignature": false
24
+ }
25
+ }