@tkeron/html-parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm_deploy.yml +24 -0
- package/LICENSE +21 -0
- package/README.md +120 -0
- package/bun.lock +29 -0
- package/index.ts +18 -0
- package/package.json +25 -0
- package/src/css-selector.ts +172 -0
- package/src/dom-simulator.ts +592 -0
- package/src/dom-types.ts +78 -0
- package/src/parser.ts +355 -0
- package/src/tokenizer.ts +413 -0
- package/tests/advanced.test.ts +487 -0
- package/tests/api-integration.test.ts +114 -0
- package/tests/dom-extended.test.ts +173 -0
- package/tests/dom.test.ts +482 -0
- package/tests/google-dom.test.ts +118 -0
- package/tests/google-homepage.txt +13 -0
- package/tests/official/README.md +87 -0
- package/tests/official/acid/acid-tests.test.ts +309 -0
- package/tests/official/final-output/final-output.test.ts +361 -0
- package/tests/official/html5lib/tokenizer-utils.ts +204 -0
- package/tests/official/html5lib/tokenizer.test.ts +184 -0
- package/tests/official/html5lib/tree-construction-utils.ts +208 -0
- package/tests/official/html5lib/tree-construction.test.ts +250 -0
- package/tests/official/validator/validator-tests.test.ts +237 -0
- package/tests/official/validator-nu/validator-nu.test.ts +335 -0
- package/tests/official/whatwg/whatwg-tests.test.ts +205 -0
- package/tests/official/wpt/wpt-tests.test.ts +409 -0
- package/tests/parser.test.ts +642 -0
- package/tests/selectors.test.ts +65 -0
- package/tests/test-page-0.txt +362 -0
- package/tests/tokenizer.test.ts +666 -0
- package/tsconfig.json +25 -0
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
import { describe, it, expect } from 'bun:test';
|
|
2
|
+
import { tokenize } from '../../../src/tokenizer';
|
|
3
|
+
import { parse } from '../../../src/parser';
|
|
4
|
+
|
|
5
|
+
describe('WHATWG HTML Parser Tests', () => {
|
|
6
|
+
describe('HTML Living Standard Compliance', () => {
|
|
7
|
+
it('should handle HTML5 polyglot documents', () => {
|
|
8
|
+
const polyglotHTML = `<!DOCTYPE html>
|
|
9
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
10
|
+
<head>
|
|
11
|
+
<title>Polyglot Document</title>
|
|
12
|
+
</head>
|
|
13
|
+
<body>
|
|
14
|
+
<p>This is a polyglot document.</p>
|
|
15
|
+
</body>
|
|
16
|
+
</html>`;
|
|
17
|
+
|
|
18
|
+
const tokens = tokenize(polyglotHTML);
|
|
19
|
+
const ast = parse(tokens);
|
|
20
|
+
|
|
21
|
+
expect(ast).toBeDefined();
|
|
22
|
+
expect((ast as any).children?.length).toBeGreaterThan(0);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it('should handle HTML5 custom elements', () => {
|
|
26
|
+
const customElements = [
|
|
27
|
+
'<my-element>Content</my-element>',
|
|
28
|
+
'<custom-button onclick="handleClick()">Click me</custom-button>',
|
|
29
|
+
'<web-component data-value="test">Component</web-component>'
|
|
30
|
+
];
|
|
31
|
+
|
|
32
|
+
customElements.forEach(element => {
|
|
33
|
+
const tokens = tokenize(element);
|
|
34
|
+
const ast = parse(tokens);
|
|
35
|
+
|
|
36
|
+
expect(ast).toBeDefined();
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it('should handle HTML5 microdata', () => {
|
|
41
|
+
const microdataHTML = `
|
|
42
|
+
<div itemscope itemtype="http://schema.org/Person">
|
|
43
|
+
<span itemprop="name">John Doe</span>
|
|
44
|
+
<span itemprop="email">john@example.com</span>
|
|
45
|
+
</div>
|
|
46
|
+
`;
|
|
47
|
+
|
|
48
|
+
const tokens = tokenize(microdataHTML);
|
|
49
|
+
const ast = parse(tokens);
|
|
50
|
+
|
|
51
|
+
expect(ast).toBeDefined();
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it('should handle HTML5 shadow DOM elements', () => {
|
|
55
|
+
const shadowHTML = `
|
|
56
|
+
<div>
|
|
57
|
+
<template shadowrootmode="open">
|
|
58
|
+
<style>:host { display: block; }</style>
|
|
59
|
+
<slot></slot>
|
|
60
|
+
</template>
|
|
61
|
+
<p>Shadow content</p>
|
|
62
|
+
</div>
|
|
63
|
+
`;
|
|
64
|
+
|
|
65
|
+
const tokens = tokenize(shadowHTML);
|
|
66
|
+
const ast = parse(tokens);
|
|
67
|
+
|
|
68
|
+
expect(ast).toBeDefined();
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
it('should handle HTML5 web components', () => {
|
|
72
|
+
const webComponentHTML = `
|
|
73
|
+
<custom-element>
|
|
74
|
+
<template>
|
|
75
|
+
<style>
|
|
76
|
+
:host { display: block; }
|
|
77
|
+
</style>
|
|
78
|
+
<div class="content">
|
|
79
|
+
<slot name="title"></slot>
|
|
80
|
+
<slot></slot>
|
|
81
|
+
</div>
|
|
82
|
+
</template>
|
|
83
|
+
</custom-element>
|
|
84
|
+
`;
|
|
85
|
+
|
|
86
|
+
const tokens = tokenize(webComponentHTML);
|
|
87
|
+
const ast = parse(tokens);
|
|
88
|
+
|
|
89
|
+
expect(ast).toBeDefined();
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
describe('HTML5 Module Scripts', () => {
|
|
94
|
+
it('should handle ES6 module scripts', () => {
|
|
95
|
+
const moduleHTML = `
|
|
96
|
+
<script type="module">
|
|
97
|
+
import { component } from './component.js';
|
|
98
|
+
component.init();
|
|
99
|
+
</script>
|
|
100
|
+
`;
|
|
101
|
+
|
|
102
|
+
const tokens = tokenize(moduleHTML);
|
|
103
|
+
const ast = parse(tokens);
|
|
104
|
+
|
|
105
|
+
expect(ast).toBeDefined();
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it('should handle importmap', () => {
|
|
109
|
+
const importmapHTML = `
|
|
110
|
+
<script type="importmap">
|
|
111
|
+
{
|
|
112
|
+
"imports": {
|
|
113
|
+
"lodash": "https://cdn.skypack.dev/lodash"
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
</script>
|
|
117
|
+
`;
|
|
118
|
+
|
|
119
|
+
const tokens = tokenize(importmapHTML);
|
|
120
|
+
const ast = parse(tokens);
|
|
121
|
+
|
|
122
|
+
expect(ast).toBeDefined();
|
|
123
|
+
});
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
describe('HTML5 Progressive Enhancement', () => {
|
|
127
|
+
it('should handle picture elements', () => {
|
|
128
|
+
const pictureHTML = `
|
|
129
|
+
<picture>
|
|
130
|
+
<source media="(min-width: 800px)" srcset="large.jpg">
|
|
131
|
+
<source media="(min-width: 400px)" srcset="medium.jpg">
|
|
132
|
+
<img src="small.jpg" alt="Description">
|
|
133
|
+
</picture>
|
|
134
|
+
`;
|
|
135
|
+
|
|
136
|
+
const tokens = tokenize(pictureHTML);
|
|
137
|
+
const ast = parse(tokens);
|
|
138
|
+
|
|
139
|
+
expect(ast).toBeDefined();
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
it('should handle details/summary elements', () => {
|
|
143
|
+
const detailsHTML = `
|
|
144
|
+
<details>
|
|
145
|
+
<summary>Show/Hide Content</summary>
|
|
146
|
+
<p>This content can be toggled.</p>
|
|
147
|
+
</details>
|
|
148
|
+
`;
|
|
149
|
+
|
|
150
|
+
const tokens = tokenize(detailsHTML);
|
|
151
|
+
const ast = parse(tokens);
|
|
152
|
+
|
|
153
|
+
expect(ast).toBeDefined();
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
it('should handle dialog elements', () => {
|
|
157
|
+
const dialogHTML = `
|
|
158
|
+
<dialog open>
|
|
159
|
+
<form method="dialog">
|
|
160
|
+
<p>This is a dialog box.</p>
|
|
161
|
+
<button>Close</button>
|
|
162
|
+
</form>
|
|
163
|
+
</dialog>
|
|
164
|
+
`;
|
|
165
|
+
|
|
166
|
+
const tokens = tokenize(dialogHTML);
|
|
167
|
+
const ast = parse(tokens);
|
|
168
|
+
|
|
169
|
+
expect(ast).toBeDefined();
|
|
170
|
+
});
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
describe('HTML5 Accessibility Features', () => {
|
|
174
|
+
it('should handle ARIA attributes', () => {
|
|
175
|
+
const ariaHTML = `
|
|
176
|
+
<div role="button" aria-label="Close dialog" aria-pressed="false" tabindex="0">
|
|
177
|
+
<span aria-hidden="true">×</span>
|
|
178
|
+
</div>
|
|
179
|
+
`;
|
|
180
|
+
|
|
181
|
+
const tokens = tokenize(ariaHTML);
|
|
182
|
+
const ast = parse(tokens);
|
|
183
|
+
|
|
184
|
+
expect(ast).toBeDefined();
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
it('should handle landmark roles', () => {
|
|
188
|
+
const landmarkHTML = `
|
|
189
|
+
<div role="main">
|
|
190
|
+
<div role="navigation">
|
|
191
|
+
<ul role="menubar">
|
|
192
|
+
<li role="menuitem">Home</li>
|
|
193
|
+
<li role="menuitem">About</li>
|
|
194
|
+
</ul>
|
|
195
|
+
</div>
|
|
196
|
+
</div>
|
|
197
|
+
`;
|
|
198
|
+
|
|
199
|
+
const tokens = tokenize(landmarkHTML);
|
|
200
|
+
const ast = parse(tokens);
|
|
201
|
+
|
|
202
|
+
expect(ast).toBeDefined();
|
|
203
|
+
});
|
|
204
|
+
});
|
|
205
|
+
});
|
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
import { describe, it, expect } from 'bun:test';
|
|
2
|
+
import { tokenize } from '../../../src/tokenizer';
|
|
3
|
+
import { parse } from '../../../src/parser';
|
|
4
|
+
|
|
5
|
+
describe('Web Platform Tests (WPT) Compliance', () => {
|
|
6
|
+
describe('HTML5 Parsing Semantics', () => {
|
|
7
|
+
it('should handle DOCTYPE variations', () => {
|
|
8
|
+
const doctypes = [
|
|
9
|
+
'<!DOCTYPE html>',
|
|
10
|
+
'<!DOCTYPE HTML>',
|
|
11
|
+
'<!doctype html>',
|
|
12
|
+
'<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">',
|
|
13
|
+
'<!DOCTYPE html SYSTEM "about:legacy-compat">'
|
|
14
|
+
];
|
|
15
|
+
|
|
16
|
+
doctypes.forEach(doctype => {
|
|
17
|
+
const html = `${doctype}<html><body>Test</body></html>`;
|
|
18
|
+
const tokens = tokenize(html);
|
|
19
|
+
const ast = parse(tokens);
|
|
20
|
+
|
|
21
|
+
expect(ast).toBeDefined();
|
|
22
|
+
expect((ast as any).children?.length).toBeGreaterThan(0);
|
|
23
|
+
});
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it('should handle HTML5 semantic elements', () => {
|
|
27
|
+
const html5Elements = [
|
|
28
|
+
'<article>Content</article>',
|
|
29
|
+
'<section>Content</section>',
|
|
30
|
+
'<nav>Navigation</nav>',
|
|
31
|
+
'<header>Header</header>',
|
|
32
|
+
'<footer>Footer</footer>',
|
|
33
|
+
'<aside>Sidebar</aside>',
|
|
34
|
+
'<main>Main content</main>',
|
|
35
|
+
'<figure><figcaption>Caption</figcaption></figure>'
|
|
36
|
+
];
|
|
37
|
+
|
|
38
|
+
html5Elements.forEach(element => {
|
|
39
|
+
const tokens = tokenize(element);
|
|
40
|
+
const ast = parse(tokens);
|
|
41
|
+
|
|
42
|
+
expect(ast).toBeDefined();
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it('should handle void elements correctly', () => {
|
|
47
|
+
const voidElements = [
|
|
48
|
+
'<area>',
|
|
49
|
+
'<base>',
|
|
50
|
+
'<br>',
|
|
51
|
+
'<col>',
|
|
52
|
+
'<embed>',
|
|
53
|
+
'<hr>',
|
|
54
|
+
'<img>',
|
|
55
|
+
'<input>',
|
|
56
|
+
'<link>',
|
|
57
|
+
'<meta>',
|
|
58
|
+
'<param>',
|
|
59
|
+
'<source>',
|
|
60
|
+
'<track>',
|
|
61
|
+
'<wbr>'
|
|
62
|
+
];
|
|
63
|
+
|
|
64
|
+
voidElements.forEach(element => {
|
|
65
|
+
const tokens = tokenize(element);
|
|
66
|
+
const ast = parse(tokens);
|
|
67
|
+
|
|
68
|
+
expect(ast).toBeDefined();
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
describe('Foreign Content (SVG/MathML)', () => {
|
|
74
|
+
it('should handle SVG elements', () => {
|
|
75
|
+
const svgHtml = `
|
|
76
|
+
<div>
|
|
77
|
+
<svg width="100" height="100">
|
|
78
|
+
<circle cx="50" cy="50" r="40" stroke="black" fill="red"/>
|
|
79
|
+
<rect x="10" y="10" width="80" height="80"/>
|
|
80
|
+
<path d="M 10 10 L 90 90"/>
|
|
81
|
+
</svg>
|
|
82
|
+
</div>
|
|
83
|
+
`;
|
|
84
|
+
|
|
85
|
+
const tokens = tokenize(svgHtml);
|
|
86
|
+
const ast = parse(tokens);
|
|
87
|
+
|
|
88
|
+
expect(ast).toBeDefined();
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it('should handle MathML elements', () => {
|
|
92
|
+
const mathmlHtml = `
|
|
93
|
+
<div>
|
|
94
|
+
<math>
|
|
95
|
+
<mrow>
|
|
96
|
+
<mi>x</mi>
|
|
97
|
+
<mo>=</mo>
|
|
98
|
+
<mfrac>
|
|
99
|
+
<mi>a</mi>
|
|
100
|
+
<mi>b</mi>
|
|
101
|
+
</mfrac>
|
|
102
|
+
</mrow>
|
|
103
|
+
</math>
|
|
104
|
+
</div>
|
|
105
|
+
`;
|
|
106
|
+
|
|
107
|
+
const tokens = tokenize(mathmlHtml);
|
|
108
|
+
const ast = parse(tokens);
|
|
109
|
+
|
|
110
|
+
expect(ast).toBeDefined();
|
|
111
|
+
});
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
describe('Character References', () => {
|
|
115
|
+
it('should handle named character references', () => {
|
|
116
|
+
const namedRefs = [
|
|
117
|
+
'&',
|
|
118
|
+
'<',
|
|
119
|
+
'>',
|
|
120
|
+
'"',
|
|
121
|
+
''',
|
|
122
|
+
' ',
|
|
123
|
+
'©',
|
|
124
|
+
'®'
|
|
125
|
+
];
|
|
126
|
+
|
|
127
|
+
namedRefs.forEach(ref => {
|
|
128
|
+
const html = `<p>Test ${ref} reference</p>`;
|
|
129
|
+
const tokens = tokenize(html);
|
|
130
|
+
const ast = parse(tokens);
|
|
131
|
+
|
|
132
|
+
expect(ast).toBeDefined();
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
it('should handle numeric character references', () => {
|
|
137
|
+
const numericRefs = [
|
|
138
|
+
'A', // A
|
|
139
|
+
'A', // A (hex)
|
|
140
|
+
'€', // Euro symbol
|
|
141
|
+
'€' // Euro symbol (hex)
|
|
142
|
+
];
|
|
143
|
+
|
|
144
|
+
numericRefs.forEach(ref => {
|
|
145
|
+
const html = `<p>Test ${ref} reference</p>`;
|
|
146
|
+
const tokens = tokenize(html);
|
|
147
|
+
const ast = parse(tokens);
|
|
148
|
+
|
|
149
|
+
expect(ast).toBeDefined();
|
|
150
|
+
});
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
it('should handle malformed character references', () => {
|
|
154
|
+
const malformedRefs = [
|
|
155
|
+
'&unknown;',
|
|
156
|
+
'&#;',
|
|
157
|
+
'&#x;',
|
|
158
|
+
'&',
|
|
159
|
+
'󴈿'
|
|
160
|
+
];
|
|
161
|
+
|
|
162
|
+
malformedRefs.forEach(ref => {
|
|
163
|
+
const html = `<p>Test ${ref} reference</p>`;
|
|
164
|
+
const tokens = tokenize(html);
|
|
165
|
+
const ast = parse(tokens);
|
|
166
|
+
|
|
167
|
+
expect(ast).toBeDefined();
|
|
168
|
+
});
|
|
169
|
+
});
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
describe('Template Elements', () => {
|
|
173
|
+
it('should handle template elements', () => {
|
|
174
|
+
const templateHtml = `
|
|
175
|
+
<template id="my-template">
|
|
176
|
+
<div class="template-content">
|
|
177
|
+
<h2>Template Title</h2>
|
|
178
|
+
<p>Template content</p>
|
|
179
|
+
</div>
|
|
180
|
+
</template>
|
|
181
|
+
`;
|
|
182
|
+
|
|
183
|
+
const tokens = tokenize(templateHtml);
|
|
184
|
+
const ast = parse(tokens);
|
|
185
|
+
|
|
186
|
+
expect(ast).toBeDefined();
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
it('should handle nested templates', () => {
|
|
190
|
+
const nestedTemplateHtml = `
|
|
191
|
+
<template>
|
|
192
|
+
<div>
|
|
193
|
+
<template>
|
|
194
|
+
<span>Nested template</span>
|
|
195
|
+
</template>
|
|
196
|
+
</div>
|
|
197
|
+
</template>
|
|
198
|
+
`;
|
|
199
|
+
|
|
200
|
+
const tokens = tokenize(nestedTemplateHtml);
|
|
201
|
+
const ast = parse(tokens);
|
|
202
|
+
|
|
203
|
+
expect(ast).toBeDefined();
|
|
204
|
+
});
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
describe('Adoption Agency Algorithm', () => {
|
|
208
|
+
it('should handle adoption agency cases', () => {
|
|
209
|
+
const adoptionCases = [
|
|
210
|
+
'<b><i></b></i>',
|
|
211
|
+
'<p><b><div></b></div></p>',
|
|
212
|
+
'<a><p><a></a></p></a>',
|
|
213
|
+
'<b><table><td></b><i></table>',
|
|
214
|
+
'<font><p>hello<b>cruel</font>world</b>'
|
|
215
|
+
];
|
|
216
|
+
|
|
217
|
+
adoptionCases.forEach(html => {
|
|
218
|
+
const tokens = tokenize(html);
|
|
219
|
+
const ast = parse(tokens);
|
|
220
|
+
|
|
221
|
+
expect(ast).toBeDefined();
|
|
222
|
+
});
|
|
223
|
+
});
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
describe('Table Parsing', () => {
|
|
227
|
+
it('should handle table structure', () => {
|
|
228
|
+
const tableHtml = `
|
|
229
|
+
<table>
|
|
230
|
+
<caption>Table Caption</caption>
|
|
231
|
+
<colgroup>
|
|
232
|
+
<col span="2">
|
|
233
|
+
</colgroup>
|
|
234
|
+
<thead>
|
|
235
|
+
<tr>
|
|
236
|
+
<th>Header 1</th>
|
|
237
|
+
<th>Header 2</th>
|
|
238
|
+
</tr>
|
|
239
|
+
</thead>
|
|
240
|
+
<tbody>
|
|
241
|
+
<tr>
|
|
242
|
+
<td>Cell 1</td>
|
|
243
|
+
<td>Cell 2</td>
|
|
244
|
+
</tr>
|
|
245
|
+
</tbody>
|
|
246
|
+
<tfoot>
|
|
247
|
+
<tr>
|
|
248
|
+
<td>Footer 1</td>
|
|
249
|
+
<td>Footer 2</td>
|
|
250
|
+
</tr>
|
|
251
|
+
</tfoot>
|
|
252
|
+
</table>
|
|
253
|
+
`;
|
|
254
|
+
|
|
255
|
+
const tokens = tokenize(tableHtml);
|
|
256
|
+
const ast = parse(tokens);
|
|
257
|
+
|
|
258
|
+
expect(ast).toBeDefined();
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
it('should handle malformed tables', () => {
|
|
262
|
+
const malformedTables = [
|
|
263
|
+
'<table><div>Content outside cells</div><tr><td>Cell</td></tr></table>',
|
|
264
|
+
'<table><tr><td><table><tr><td>Nested</td></tr></table></td></tr></table>',
|
|
265
|
+
'<table><tbody><tr><td>Cell</td></tbody><tbody><tr><td>Cell</td></tr></tbody></table>'
|
|
266
|
+
];
|
|
267
|
+
|
|
268
|
+
malformedTables.forEach(html => {
|
|
269
|
+
const tokens = tokenize(html);
|
|
270
|
+
const ast = parse(tokens);
|
|
271
|
+
|
|
272
|
+
expect(ast).toBeDefined();
|
|
273
|
+
});
|
|
274
|
+
});
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
describe('Script and Style Elements', () => {
|
|
278
|
+
it('should handle script elements', () => {
|
|
279
|
+
const scriptHtml = `
|
|
280
|
+
<script>
|
|
281
|
+
console.log('Hello World');
|
|
282
|
+
var x = 1 < 2;
|
|
283
|
+
</script>
|
|
284
|
+
<script type="text/javascript">
|
|
285
|
+
// Another script
|
|
286
|
+
</script>
|
|
287
|
+
`;
|
|
288
|
+
|
|
289
|
+
const tokens = tokenize(scriptHtml);
|
|
290
|
+
const ast = parse(tokens);
|
|
291
|
+
|
|
292
|
+
expect(ast).toBeDefined();
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
it('should handle style elements', () => {
|
|
296
|
+
const styleHtml = `
|
|
297
|
+
<style>
|
|
298
|
+
body { margin: 0; }
|
|
299
|
+
.class { color: red; }
|
|
300
|
+
/* Comment in CSS */
|
|
301
|
+
</style>
|
|
302
|
+
`;
|
|
303
|
+
|
|
304
|
+
const tokens = tokenize(styleHtml);
|
|
305
|
+
const ast = parse(tokens);
|
|
306
|
+
|
|
307
|
+
expect(ast).toBeDefined();
|
|
308
|
+
});
|
|
309
|
+
});
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
describe('WPT Integration Tests', () => {
|
|
313
|
+
it('should handle complex real-world HTML', () => {
|
|
314
|
+
const complexHtml = `
|
|
315
|
+
<!DOCTYPE html>
|
|
316
|
+
<html lang="en">
|
|
317
|
+
<head>
|
|
318
|
+
<meta charset="UTF-8">
|
|
319
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
320
|
+
<title>Complex HTML Document</title>
|
|
321
|
+
<style>
|
|
322
|
+
body { font-family: Arial, sans-serif; }
|
|
323
|
+
.container { max-width: 1200px; margin: 0 auto; }
|
|
324
|
+
</style>
|
|
325
|
+
</head>
|
|
326
|
+
<body>
|
|
327
|
+
<header>
|
|
328
|
+
<nav>
|
|
329
|
+
<ul>
|
|
330
|
+
<li><a href="#home">Home</a></li>
|
|
331
|
+
<li><a href="#about">About</a></li>
|
|
332
|
+
<li><a href="#contact">Contact</a></li>
|
|
333
|
+
</ul>
|
|
334
|
+
</nav>
|
|
335
|
+
</header>
|
|
336
|
+
|
|
337
|
+
<main class="container">
|
|
338
|
+
<article>
|
|
339
|
+
<header>
|
|
340
|
+
<h1>Article Title</h1>
|
|
341
|
+
<time datetime="2025-07-10">July 10, 2025</time>
|
|
342
|
+
</header>
|
|
343
|
+
|
|
344
|
+
<section>
|
|
345
|
+
<p>This is a paragraph with <strong>strong text</strong> and <em>emphasized text</em>.</p>
|
|
346
|
+
<p>Here's some code: <code>console.log('Hello');</code></p>
|
|
347
|
+
|
|
348
|
+
<blockquote cite="https://example.com">
|
|
349
|
+
<p>This is a blockquote with a citation.</p>
|
|
350
|
+
</blockquote>
|
|
351
|
+
|
|
352
|
+
<figure>
|
|
353
|
+
<img src="image.jpg" alt="Description" loading="lazy">
|
|
354
|
+
<figcaption>Image caption</figcaption>
|
|
355
|
+
</figure>
|
|
356
|
+
|
|
357
|
+
<table>
|
|
358
|
+
<thead>
|
|
359
|
+
<tr>
|
|
360
|
+
<th>Column 1</th>
|
|
361
|
+
<th>Column 2</th>
|
|
362
|
+
</tr>
|
|
363
|
+
</thead>
|
|
364
|
+
<tbody>
|
|
365
|
+
<tr>
|
|
366
|
+
<td>Data 1</td>
|
|
367
|
+
<td>Data 2</td>
|
|
368
|
+
</tr>
|
|
369
|
+
</tbody>
|
|
370
|
+
</table>
|
|
371
|
+
</section>
|
|
372
|
+
|
|
373
|
+
<footer>
|
|
374
|
+
<p>Article footer</p>
|
|
375
|
+
</footer>
|
|
376
|
+
</article>
|
|
377
|
+
|
|
378
|
+
<aside>
|
|
379
|
+
<h2>Related Articles</h2>
|
|
380
|
+
<ul>
|
|
381
|
+
<li><a href="/article1">Article 1</a></li>
|
|
382
|
+
<li><a href="/article2">Article 2</a></li>
|
|
383
|
+
</ul>
|
|
384
|
+
</aside>
|
|
385
|
+
</main>
|
|
386
|
+
|
|
387
|
+
<footer>
|
|
388
|
+
<p>© 2025 Test Company. All rights reserved.</p>
|
|
389
|
+
</footer>
|
|
390
|
+
|
|
391
|
+
<script>
|
|
392
|
+
document.addEventListener('DOMContentLoaded', function() {
|
|
393
|
+
console.log('Page loaded');
|
|
394
|
+
});
|
|
395
|
+
</script>
|
|
396
|
+
</body>
|
|
397
|
+
</html>
|
|
398
|
+
`;
|
|
399
|
+
|
|
400
|
+
const start = performance.now();
|
|
401
|
+
const tokens = tokenize(complexHtml);
|
|
402
|
+
const ast = parse(tokens);
|
|
403
|
+
const end = performance.now();
|
|
404
|
+
|
|
405
|
+
expect(ast).toBeDefined();
|
|
406
|
+
expect((ast as any).children?.length).toBeGreaterThan(0);
|
|
407
|
+
expect(end - start).toBeLessThan(1000); // Should parse within 1 second
|
|
408
|
+
});
|
|
409
|
+
});
|