@tkeron/html-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,237 @@
1
+ import { describe, it, expect } from 'bun:test';
2
+ import { parseHTML } from '../../../index';
3
+
4
+ describe('Validator.nu Tests', () => {
5
+ describe('HTML5 Validation Standards', () => {
6
+ it('should validate proper document structure', () => {
7
+ const validHTML = `<!DOCTYPE html>
8
+ <html lang="en">
9
+ <head>
10
+ <meta charset="UTF-8">
11
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
12
+ <title>Valid HTML5 Document</title>
13
+ </head>
14
+ <body>
15
+ <main>
16
+ <h1>Main Heading</h1>
17
+ <p>Paragraph content.</p>
18
+ </main>
19
+ </body>
20
+ </html>`;
21
+
22
+ const document = parseHTML(validHTML);
23
+
24
+ expect(document).toBeDefined();
25
+ expect(document.documentElement?.tagName).toBe('HTML');
26
+ expect(document.querySelector('title')?.textContent).toBe('Valid HTML5 Document');
27
+ });
28
+
29
+ it('should handle required attributes', () => {
30
+ const requiredAttrsHTML = `
31
+ <img src="image.jpg" alt="Description">
32
+ <input type="text" id="name" name="name">
33
+ <label for="name">Name:</label>
34
+ <area shape="rect" coords="0,0,100,100" href="#" alt="Link">
35
+ `;
36
+
37
+ const document = parseHTML(requiredAttrsHTML);
38
+
39
+ expect(document).toBeDefined();
40
+ expect(document.querySelector('img')?.getAttribute('alt')).toBe('Description');
41
+ expect(document.querySelector('label')?.getAttribute('for')).toBe('name');
42
+ });
43
+
44
+ it('should handle content model violations', () => {
45
+ // These should parse but may generate warnings in a full validator
46
+ const contentModelHTML = `
47
+ <p>
48
+ <div>Block inside paragraph</div>
49
+ </p>
50
+ <a href="#">
51
+ <a href="#">Nested anchor</a>
52
+ </a>
53
+ `;
54
+
55
+ const document = parseHTML(contentModelHTML);
56
+ // const ast = parse(tokens);
57
+
58
+ expect(document).toBeDefined();
59
+ });
60
+
61
+ it('should handle obsolete elements', () => {
62
+ const obsoleteHTML = `
63
+ <center>Centered text</center>
64
+ <font color="red">Red text</font>
65
+ <marquee>Scrolling text</marquee>
66
+ <blink>Blinking text</blink>
67
+ `;
68
+
69
+ const document = parseHTML(obsoleteHTML);
70
+ // const ast = parse(tokens);
71
+
72
+ expect(document).toBeDefined();
73
+ });
74
+
75
+ it('should handle deprecated attributes', () => {
76
+ const deprecatedHTML = `
77
+ <table border="1" cellpadding="5" cellspacing="0">
78
+ <tr>
79
+ <td bgcolor="yellow" align="center">Cell</td>
80
+ </tr>
81
+ </table>
82
+ <body bgcolor="white" text="black">
83
+ <p align="justify">Text</p>
84
+ </body>
85
+ `;
86
+
87
+ const document = parseHTML(deprecatedHTML);
88
+ // const ast = parse(tokens);
89
+
90
+ expect(document).toBeDefined();
91
+ });
92
+ });
93
+
94
+ describe('HTML5 Conformance Checking', () => {
95
+ it('should handle valid HTML5 forms', () => {
96
+ const formHTML = `
97
+ <form action="/submit" method="post">
98
+ <fieldset>
99
+ <legend>Personal Information</legend>
100
+ <label for="email">Email:</label>
101
+ <input type="email" id="email" name="email" required>
102
+
103
+ <label for="phone">Phone:</label>
104
+ <input type="tel" id="phone" name="phone">
105
+
106
+ <label for="date">Date:</label>
107
+ <input type="date" id="date" name="date">
108
+
109
+ <label for="range">Range:</label>
110
+ <input type="range" id="range" name="range" min="0" max="100">
111
+ </fieldset>
112
+ <button type="submit">Submit</button>
113
+ </form>
114
+ `;
115
+
116
+ const document = parseHTML(formHTML);
117
+ // const ast = parse(tokens);
118
+
119
+ expect(document).toBeDefined();
120
+ });
121
+
122
+ it('should handle valid HTML5 media elements', () => {
123
+ const mediaHTML = `
124
+ <video controls width="640" height="480">
125
+ <source src="video.mp4" type="video/mp4">
126
+ <source src="video.webm" type="video/webm">
127
+ <track kind="subtitles" src="subs.vtt" srclang="en" label="English">
128
+ <p>Your browser doesn't support HTML5 video.</p>
129
+ </video>
130
+
131
+ <audio controls>
132
+ <source src="audio.mp3" type="audio/mpeg">
133
+ <source src="audio.ogg" type="audio/ogg">
134
+ <p>Your browser doesn't support HTML5 audio.</p>
135
+ </audio>
136
+ `;
137
+
138
+ const document = parseHTML(mediaHTML);
139
+ // const ast = parse(tokens);
140
+
141
+ expect(document).toBeDefined();
142
+ });
143
+
144
+ it('should handle valid HTML5 semantic structure', () => {
145
+ const semanticHTML = `
146
+ <article>
147
+ <header>
148
+ <h1>Article Title</h1>
149
+ <p>Published on <time datetime="2023-01-01">January 1, 2023</time></p>
150
+ </header>
151
+ <section>
152
+ <h2>Section Title</h2>
153
+ <p>Section content with <mark>highlighted text</mark>.</p>
154
+ </section>
155
+ <aside>
156
+ <p>Related information</p>
157
+ </aside>
158
+ <footer>
159
+ <p>Article footer</p>
160
+ </footer>
161
+ </article>
162
+ `;
163
+
164
+ const document = parseHTML(semanticHTML);
165
+ // const ast = parse(tokens);
166
+
167
+ expect(document).toBeDefined();
168
+ });
169
+
170
+ it('should handle valid HTML5 interactive elements', () => {
171
+ const interactiveHTML = `
172
+ <details>
173
+ <summary>Click to expand</summary>
174
+ <p>Hidden content that can be revealed.</p>
175
+ </details>
176
+
177
+ <dialog id="modal">
178
+ <p>This is a modal dialog.</p>
179
+ <button onclick="document.getElementById('modal').close()">Close</button>
180
+ </dialog>
181
+
182
+ <progress value="70" max="100">70%</progress>
183
+ <meter value="0.8" min="0" max="1">80%</meter>
184
+ `;
185
+
186
+ const document = parseHTML(interactiveHTML);
187
+ // const ast = parse(tokens);
188
+
189
+ expect(document).toBeDefined();
190
+ });
191
+ });
192
+
193
+ describe('HTML5 Error Recovery', () => {
194
+ it('should handle missing closing tags', () => {
195
+ const unclosedHTML = `
196
+ <div>
197
+ <p>Paragraph without closing tag
198
+ <span>Span without closing tag
199
+ </div>
200
+ `;
201
+
202
+ const document = parseHTML(unclosedHTML);
203
+ // const ast = parse(tokens);
204
+
205
+ expect(document).toBeDefined();
206
+ });
207
+
208
+ it('should handle mismatched tags', () => {
209
+ const mismatchedHTML = `
210
+ <div>
211
+ <p>Paragraph
212
+ <span>Span</p>
213
+ </span>
214
+ </div>
215
+ `;
216
+
217
+ const document = parseHTML(mismatchedHTML);
218
+ // const ast = parse(tokens);
219
+
220
+ expect(document).toBeDefined();
221
+ });
222
+
223
+ it('should handle invalid nesting', () => {
224
+ const invalidNestingHTML = `
225
+ <p>
226
+ <div>Block in paragraph</div>
227
+ <p>Paragraph in paragraph</p>
228
+ </p>
229
+ `;
230
+
231
+ const document = parseHTML(invalidNestingHTML);
232
+ // const ast = parse(tokens);
233
+
234
+ expect(document).toBeDefined();
235
+ });
236
+ });
237
+ });
@@ -0,0 +1,335 @@
1
+ import { describe, it, expect } from 'bun:test';
2
+ import { parseHTML } from '../../../index';
3
+
4
+ describe('Validator.nu HTML5 Compliance Tests', () => {
5
+ describe('HTML5 Document Structure', () => {
6
+ it('should handle valid HTML5 document', () => {
7
+ const html = `<!DOCTYPE html>
8
+ <html lang="en">
9
+ <head>
10
+ <meta charset="UTF-8">
11
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
12
+ <title>Valid HTML5 Document</title>
13
+ </head>
14
+ <body>
15
+ <h1>Welcome</h1>
16
+ <p>This is a valid HTML5 document.</p>
17
+ </body>
18
+ </html>`;
19
+
20
+ const document = parseHTML(html);
21
+
22
+ expect(document).toBeDefined();
23
+ expect(document.documentElement?.getAttribute('lang')).toBe('en');
24
+
25
+ const meta = document.querySelector('meta[charset]');
26
+ expect(meta?.getAttribute('charset')).toBe('UTF-8');
27
+
28
+ const viewport = document.querySelector('meta[name="viewport"]');
29
+ expect(viewport?.getAttribute('content')).toBe('width=device-width, initial-scale=1.0');
30
+ });
31
+
32
+ it('should handle missing DOCTYPE gracefully', () => {
33
+ const html = `<html>
34
+ <head><title>No DOCTYPE</title></head>
35
+ <body><p>Content</p></body>
36
+ </html>`;
37
+
38
+ const document = parseHTML(html);
39
+
40
+ expect(document).toBeDefined();
41
+ expect(document.querySelector('title')?.textContent).toBe('No DOCTYPE');
42
+ });
43
+
44
+ it('should handle HTML5 sectioning elements', () => {
45
+ const html = `<!DOCTYPE html>
46
+ <html>
47
+ <body>
48
+ <header>
49
+ <h1>Site Title</h1>
50
+ <nav>
51
+ <ul>
52
+ <li><a href="#home">Home</a></li>
53
+ <li><a href="#about">About</a></li>
54
+ </ul>
55
+ </nav>
56
+ </header>
57
+ <main>
58
+ <article>
59
+ <h2>Article Title</h2>
60
+ <section>
61
+ <h3>Section Title</h3>
62
+ <p>Section content</p>
63
+ </section>
64
+ </article>
65
+ <aside>
66
+ <h3>Sidebar</h3>
67
+ <p>Sidebar content</p>
68
+ </aside>
69
+ </main>
70
+ <footer>
71
+ <p>&copy; 2025 Site Name</p>
72
+ </footer>
73
+ </body>
74
+ </html>`;
75
+
76
+ const document = parseHTML(html);
77
+
78
+ expect(document.querySelector('header')).toBeDefined();
79
+ expect(document.querySelector('nav')).toBeDefined();
80
+ expect(document.querySelector('main')).toBeDefined();
81
+ expect(document.querySelector('article')).toBeDefined();
82
+ expect(document.querySelector('section')).toBeDefined();
83
+ expect(document.querySelector('aside')).toBeDefined();
84
+ expect(document.querySelector('footer')).toBeDefined();
85
+ });
86
+ });
87
+
88
+ describe('HTML5 Form Validation', () => {
89
+ it('should handle required form fields', () => {
90
+ const html = `<form>
91
+ <label for="email">Email (required):</label>
92
+ <input type="email" id="email" name="email" required>
93
+
94
+ <label for="phone">Phone (optional):</label>
95
+ <input type="tel" id="phone" name="phone">
96
+
97
+ <button type="submit">Submit</button>
98
+ </form>`;
99
+
100
+ const document = parseHTML(html);
101
+
102
+ const emailInput = document.querySelector('input[type="email"]');
103
+ expect(emailInput?.hasAttribute('required')).toBe(true);
104
+
105
+ const phoneInput = document.querySelector('input[type="tel"]');
106
+ expect(phoneInput?.hasAttribute('required')).toBe(false);
107
+ });
108
+
109
+ it('should handle HTML5 input types', () => {
110
+ const html = `<form>
111
+ <input type="email" placeholder="Email">
112
+ <input type="url" placeholder="Website">
113
+ <input type="tel" placeholder="Phone">
114
+ <input type="date" placeholder="Date">
115
+ <input type="time" placeholder="Time">
116
+ <input type="number" min="0" max="100" step="1">
117
+ <input type="range" min="0" max="100" value="50">
118
+ <input type="color" value="#ff0000">
119
+ <input type="search" placeholder="Search">
120
+ </form>`;
121
+
122
+ const document = parseHTML(html);
123
+
124
+ expect(document.querySelector('input[type="email"]')).toBeDefined();
125
+ expect(document.querySelector('input[type="url"]')).toBeDefined();
126
+ expect(document.querySelector('input[type="tel"]')).toBeDefined();
127
+ expect(document.querySelector('input[type="date"]')).toBeDefined();
128
+ expect(document.querySelector('input[type="time"]')).toBeDefined();
129
+ expect(document.querySelector('input[type="number"]')).toBeDefined();
130
+ expect(document.querySelector('input[type="range"]')).toBeDefined();
131
+ expect(document.querySelector('input[type="color"]')).toBeDefined();
132
+ expect(document.querySelector('input[type="search"]')).toBeDefined();
133
+ });
134
+
135
+ it('should handle form validation attributes', () => {
136
+ const html = `<form>
137
+ <input type="text" pattern="[A-Za-z]{3,}" title="At least 3 letters">
138
+ <input type="email" required>
139
+ <input type="number" min="1" max="10">
140
+ <input type="text" maxlength="50">
141
+ <input type="password" minlength="8">
142
+ <textarea rows="4" cols="50" placeholder="Comments"></textarea>
143
+ </form>`;
144
+
145
+ const document = parseHTML(html);
146
+
147
+ const patternInput = document.querySelector('input[pattern]');
148
+ expect(patternInput?.getAttribute('pattern')).toBe('[A-Za-z]{3,}');
149
+
150
+ const numberInput = document.querySelector('input[type="number"]');
151
+ expect(numberInput?.getAttribute('min')).toBe('1');
152
+ expect(numberInput?.getAttribute('max')).toBe('10');
153
+
154
+ const maxlengthInput = document.querySelector('input[maxlength]');
155
+ expect(maxlengthInput?.getAttribute('maxlength')).toBe('50');
156
+
157
+ const minlengthInput = document.querySelector('input[minlength]');
158
+ expect(minlengthInput?.getAttribute('minlength')).toBe('8');
159
+ });
160
+ });
161
+
162
+ describe('HTML5 Media Elements', () => {
163
+ it('should handle audio elements', () => {
164
+ const html = `<audio controls>
165
+ <source src="audio.mp3" type="audio/mpeg">
166
+ <source src="audio.ogg" type="audio/ogg">
167
+ Your browser does not support the audio element.
168
+ </audio>`;
169
+
170
+ const document = parseHTML(html);
171
+
172
+ const audio = document.querySelector('audio');
173
+ expect(audio?.hasAttribute('controls')).toBe(true);
174
+
175
+ const sources = document.querySelectorAll('source');
176
+ expect(sources.length).toBe(2);
177
+ expect(sources[0]?.getAttribute('type')).toBe('audio/mpeg');
178
+ expect(sources[1]?.getAttribute('type')).toBe('audio/ogg');
179
+ });
180
+
181
+ it('should handle video elements', () => {
182
+ const html = `<video width="320" height="240" controls>
183
+ <source src="movie.mp4" type="video/mp4">
184
+ <source src="movie.ogg" type="video/ogg">
185
+ <track kind="captions" src="captions.vtt" srclang="en" label="English">
186
+ Your browser does not support the video tag.
187
+ </video>`;
188
+
189
+ const document = parseHTML(html);
190
+
191
+ const video = document.querySelector('video');
192
+ expect(video?.getAttribute('width')).toBe('320');
193
+ expect(video?.getAttribute('height')).toBe('240');
194
+ expect(video?.hasAttribute('controls')).toBe(true);
195
+
196
+ const track = document.querySelector('track');
197
+ expect(track?.getAttribute('kind')).toBe('captions');
198
+ expect(track?.getAttribute('srclang')).toBe('en');
199
+ });
200
+
201
+ it('should handle picture elements', () => {
202
+ const html = `<picture>
203
+ <source media="(min-width: 650px)" srcset="img_pink_flowers.jpg">
204
+ <source media="(min-width: 465px)" srcset="img_white_flower.jpg">
205
+ <img src="img_orange_flowers.jpg" alt="Flowers" style="width:auto;">
206
+ </picture>`;
207
+
208
+ const document = parseHTML(html);
209
+
210
+ const picture = document.querySelector('picture');
211
+ expect(picture).toBeDefined();
212
+
213
+ const sources = picture?.querySelectorAll('source');
214
+ expect(sources?.length).toBe(2);
215
+
216
+ const img = picture?.querySelector('img');
217
+ expect(img?.getAttribute('alt')).toBe('Flowers');
218
+ });
219
+ });
220
+
221
+ describe('HTML5 Interactive Elements', () => {
222
+ it('should handle details and summary elements', () => {
223
+ const html = `<details>
224
+ <summary>Click to expand</summary>
225
+ <p>Hidden content that can be toggled.</p>
226
+ <ul>
227
+ <li>Item 1</li>
228
+ <li>Item 2</li>
229
+ </ul>
230
+ </details>`;
231
+
232
+ const document = parseHTML(html);
233
+
234
+ const details = document.querySelector('details');
235
+ expect(details).toBeDefined();
236
+
237
+ const summary = document.querySelector('summary');
238
+ expect(summary?.textContent).toBe('Click to expand');
239
+
240
+ const p = details?.querySelector('p');
241
+ expect(p?.textContent).toBe('Hidden content that can be toggled.');
242
+ });
243
+
244
+ it('should handle dialog elements', () => {
245
+ const html = `<dialog id="myDialog">
246
+ <form method="dialog">
247
+ <p>Are you sure you want to delete this item?</p>
248
+ <button value="cancel">Cancel</button>
249
+ <button value="confirm">Confirm</button>
250
+ </form>
251
+ </dialog>`;
252
+
253
+ const document = parseHTML(html);
254
+
255
+ const dialog = document.querySelector('dialog');
256
+ expect(dialog?.getAttribute('id')).toBe('myDialog');
257
+
258
+ const form = dialog?.querySelector('form');
259
+ expect(form?.getAttribute('method')).toBe('dialog');
260
+
261
+ const buttons = dialog?.querySelectorAll('button');
262
+ expect(buttons?.length).toBe(2);
263
+ });
264
+
265
+ it('should handle progress and meter elements', () => {
266
+ const html = `<div>
267
+ <label for="progress">Download progress:</label>
268
+ <progress id="progress" value="32" max="100">32%</progress>
269
+
270
+ <label for="meter">Disk usage:</label>
271
+ <meter id="meter" value="6" min="0" max="10">6 out of 10</meter>
272
+ </div>`;
273
+
274
+ const document = parseHTML(html);
275
+
276
+ const progress = document.querySelector('progress');
277
+ expect(progress?.getAttribute('value')).toBe('32');
278
+ expect(progress?.getAttribute('max')).toBe('100');
279
+
280
+ const meter = document.querySelector('meter');
281
+ expect(meter?.getAttribute('value')).toBe('6');
282
+ expect(meter?.getAttribute('min')).toBe('0');
283
+ expect(meter?.getAttribute('max')).toBe('10');
284
+ });
285
+ });
286
+
287
+ describe('HTML5 Accessibility', () => {
288
+ it('should handle ARIA attributes', () => {
289
+ const html = `<div role="button" aria-label="Close dialog" aria-pressed="false" tabindex="0">
290
+ <span aria-hidden="true">×</span>
291
+ </div>`;
292
+
293
+ const document = parseHTML(html);
294
+
295
+ const button = document.querySelector('div[role="button"]');
296
+ expect(button?.getAttribute('aria-label')).toBe('Close dialog');
297
+ expect(button?.getAttribute('aria-pressed')).toBe('false');
298
+ expect(button?.getAttribute('tabindex')).toBe('0');
299
+
300
+ const span = button?.querySelector('span');
301
+ expect(span?.getAttribute('aria-hidden')).toBe('true');
302
+ });
303
+
304
+ it('should handle landmark roles', () => {
305
+ const html = `<div role="banner">
306
+ <h1>Site Title</h1>
307
+ </div>
308
+ <div role="navigation">
309
+ <ul>
310
+ <li><a href="#home">Home</a></li>
311
+ <li><a href="#about">About</a></li>
312
+ </ul>
313
+ </div>
314
+ <div role="main">
315
+ <h2>Main Content</h2>
316
+ <p>Content goes here.</p>
317
+ </div>
318
+ <div role="complementary">
319
+ <h3>Sidebar</h3>
320
+ <p>Additional information.</p>
321
+ </div>
322
+ <div role="contentinfo">
323
+ <p>&copy; 2025 Site Name</p>
324
+ </div>`;
325
+
326
+ const document = parseHTML(html);
327
+
328
+ expect(document.querySelector('[role="banner"]')).toBeDefined();
329
+ expect(document.querySelector('[role="navigation"]')).toBeDefined();
330
+ expect(document.querySelector('[role="main"]')).toBeDefined();
331
+ expect(document.querySelector('[role="complementary"]')).toBeDefined();
332
+ expect(document.querySelector('[role="contentinfo"]')).toBeDefined();
333
+ });
334
+ });
335
+ });