@tkeron/html-parser 0.1.7 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -7
- package/bun.lock +5 -0
- package/index.ts +4 -0
- package/package.json +7 -1
- package/src/css-selector.ts +1 -1
- package/src/dom-simulator.ts +38 -16
- package/src/encoding.ts +39 -0
- package/src/index.ts +9 -0
- package/src/parser.ts +478 -144
- package/src/serializer.ts +450 -0
- package/src/tokenizer.ts +59 -43
- package/tests/advanced.test.ts +119 -106
- package/tests/custom-elements.test.ts +172 -162
- package/tests/dom-extended.test.ts +12 -12
- package/tests/dom-manipulation.test.ts +9 -10
- package/tests/dom.test.ts +32 -27
- package/tests/helpers/tokenizer-adapter.test.ts +70 -0
- package/tests/helpers/tokenizer-adapter.ts +65 -0
- package/tests/helpers/tree-adapter.test.ts +39 -0
- package/tests/helpers/tree-adapter.ts +43 -0
- package/tests/html5lib-data/tokenizer/namedEntities.test +42422 -0
- package/tests/html5lib-data/tokenizer/pendingSpecChanges.test +9 -0
- package/tests/html5lib-data/tree-construction/adoption01.dat +354 -0
- package/tests/html5lib-data/tree-construction/adoption02.dat +39 -0
- package/tests/html5lib-data/tree-construction/domjs-unsafe.dat +0 -0
- package/tests/html5lib-data/tree-construction/entities02.dat +309 -0
- package/tests/html5lib-data/tree-construction/html5test-com.dat +301 -0
- package/tests/html5lib-data/tree-construction/math.dat +104 -0
- package/tests/html5lib-data/tree-construction/namespace-sensitivity.dat +22 -0
- package/tests/html5lib-data/tree-construction/noscript01.dat +237 -0
- package/tests/html5lib-data/tree-construction/ruby.dat +302 -0
- package/tests/html5lib-data/tree-construction/scriptdata01.dat +372 -0
- package/tests/html5lib-data/tree-construction/svg.dat +104 -0
- package/tests/html5lib-data/tree-construction/template.dat +1673 -0
- package/tests/html5lib-data/tree-construction/tests10.dat +853 -0
- package/tests/html5lib-data/tree-construction/tests11.dat +523 -0
- package/tests/html5lib-data/tree-construction/tests20.dat +842 -0
- package/tests/html5lib-data/tree-construction/tests21.dat +306 -0
- package/tests/html5lib-data/tree-construction/tests23.dat +168 -0
- package/tests/html5lib-data/tree-construction/tests24.dat +79 -0
- package/tests/html5lib-data/tree-construction/tests5.dat +210 -0
- package/tests/html5lib-data/tree-construction/tests6.dat +663 -0
- package/tests/html5lib-data/tree-construction/tests_innerHTML_1.dat +844 -0
- package/tests/parser.test.ts +172 -193
- package/tests/serializer-core.test.ts +16 -0
- package/tests/serializer-data/core.test +125 -0
- package/tests/serializer-data/injectmeta.test +66 -0
- package/tests/serializer-data/optionaltags.test +965 -0
- package/tests/serializer-data/options.test +60 -0
- package/tests/serializer-data/whitespace.test +51 -0
- package/tests/serializer-injectmeta.test.ts +16 -0
- package/tests/serializer-optionaltags.test.ts +16 -0
- package/tests/serializer-options.test.ts +16 -0
- package/tests/serializer-whitespace.test.ts +16 -0
- package/tests/tokenizer-namedEntities.test.ts +20 -0
- package/tests/tokenizer-pendingSpecChanges.test.ts +20 -0
- package/tests/tokenizer.test.ts +3 -6
- package/tests/tree-construction-adoption01.test.ts +37 -0
- package/tests/tree-construction-adoption02.test.ts +34 -0
- package/tests/tree-construction-domjs-unsafe.test.ts +24 -0
- package/tests/tree-construction-entities02.test.ts +33 -0
- package/tests/tree-construction-html5test-com.test.ts +24 -0
- package/tests/tree-construction-math.test.ts +18 -0
- package/tests/tree-construction-namespace-sensitivity.test.ts +18 -0
- package/tests/tree-construction-noscript01.test.ts +18 -0
- package/tests/tree-construction-ruby.test.ts +21 -0
- package/tests/tree-construction-scriptdata01.test.ts +21 -0
- package/tests/tree-construction-svg.test.ts +21 -0
- package/tests/tree-construction-template.test.ts +21 -0
- package/tests/tree-construction-tests10.test.ts +21 -0
- package/tests/tree-construction-tests11.test.ts +21 -0
- package/tests/tree-construction-tests20.test.ts +18 -0
- package/tests/tree-construction-tests21.test.ts +18 -0
- package/tests/tree-construction-tests23.test.ts +18 -0
- package/tests/tree-construction-tests24.test.ts +18 -0
- package/tests/tree-construction-tests5.test.ts +21 -0
- package/tests/tree-construction-tests6.test.ts +21 -0
- package/tests/tree-construction-tests_innerHTML_1.test.ts +21 -0
- package/tests/official/README.md +0 -87
- package/tests/official/acid/acid-tests.test.ts +0 -309
- package/tests/official/final-output/final-output.test.ts +0 -361
- package/tests/official/html5lib/tokenizer-utils.ts +0 -192
- package/tests/official/html5lib/tokenizer.test.ts +0 -171
- package/tests/official/html5lib/tree-construction-utils.ts +0 -194
- package/tests/official/html5lib/tree-construction.test.ts +0 -250
- package/tests/official/validator/validator-tests.test.ts +0 -237
- package/tests/official/validator-nu/validator-nu.test.ts +0 -335
- package/tests/official/whatwg/whatwg-tests.test.ts +0 -205
- package/tests/official/wpt/wpt-tests.test.ts +0 -409
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { readFileSync } from "fs";
|
|
2
|
+
import { parse } from "../src/index.ts";
|
|
3
|
+
|
|
4
|
+
describe("Tree Construction Tests23 Tests", () => {
|
|
5
|
+
const data = readFileSync("tests/html5lib-data/tree-construction/tests23.dat", "utf8");
|
|
6
|
+
const tests = data.split("#data\n").slice(1);
|
|
7
|
+
|
|
8
|
+
for (const test of tests) {
|
|
9
|
+
const [input, expected] = test.split("#document\n");
|
|
10
|
+
const title = input.trim().split("\n")[0] || "Unnamed test";
|
|
11
|
+
const html = input.trim();
|
|
12
|
+
|
|
13
|
+
it.skip(title, () => {
|
|
14
|
+
const doc = parse(html);
|
|
15
|
+
expect(doc).toBeDefined();
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
});
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { readFileSync } from "fs";
|
|
2
|
+
import { parse } from "../src/index.ts";
|
|
3
|
+
|
|
4
|
+
describe("Tree Construction Tests24 Tests", () => {
|
|
5
|
+
const data = readFileSync("tests/html5lib-data/tree-construction/tests24.dat", "utf8");
|
|
6
|
+
const tests = data.split("#data\n").slice(1);
|
|
7
|
+
|
|
8
|
+
for (const test of tests) {
|
|
9
|
+
const [input, expected] = test.split("#document\n");
|
|
10
|
+
const title = input.trim().split("\n")[0] || "Unnamed test";
|
|
11
|
+
const html = input.trim();
|
|
12
|
+
|
|
13
|
+
it.skip(title, () => {
|
|
14
|
+
const doc = parse(html);
|
|
15
|
+
expect(doc).toBeDefined();
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
});
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { readFileSync } from "fs";
|
|
2
|
+
import { parse } from "../src/index.ts";
|
|
3
|
+
|
|
4
|
+
describe("Tree Construction Tests5 Tests", () => {
|
|
5
|
+
const content = readFileSync("tests/html5lib-data/tree-construction/tests5.dat", "utf8");
|
|
6
|
+
const sections = content.split("#data\n");
|
|
7
|
+
|
|
8
|
+
for (let i = 1; i < sections.length; i++) {
|
|
9
|
+
const section = sections[i];
|
|
10
|
+
const [dataPart, documentPart] = section.split("#document\n");
|
|
11
|
+
const data = dataPart.trim();
|
|
12
|
+
const expectedDocument = documentPart ? documentPart.split("#errors\n")[0].trim() : "";
|
|
13
|
+
const errors = documentPart && documentPart.includes("#errors\n") ? documentPart.split("#errors\n")[1].trim() : "";
|
|
14
|
+
|
|
15
|
+
it(`Tests5 test ${i}`, () => {
|
|
16
|
+
const doc = parse(data);
|
|
17
|
+
expect(doc).toBeDefined();
|
|
18
|
+
// TODO: Implement DOM serialization and comparison
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
});
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { readFileSync } from "fs";
|
|
2
|
+
import { parse } from "../src/index.ts";
|
|
3
|
+
|
|
4
|
+
describe("Tree Construction Tests6 Tests", () => {
|
|
5
|
+
const content = readFileSync("tests/html5lib-data/tree-construction/tests6.dat", "utf8");
|
|
6
|
+
const sections = content.split("#data\n");
|
|
7
|
+
|
|
8
|
+
for (let i = 1; i < sections.length; i++) {
|
|
9
|
+
const section = sections[i];
|
|
10
|
+
const [dataPart, documentPart] = section.split("#document\n");
|
|
11
|
+
const data = dataPart.trim();
|
|
12
|
+
const expectedDocument = documentPart ? documentPart.split("#errors\n")[0].trim() : "";
|
|
13
|
+
const errors = documentPart && documentPart.includes("#errors\n") ? documentPart.split("#errors\n")[1].trim() : "";
|
|
14
|
+
|
|
15
|
+
it(`Tests6 test ${i}`, () => {
|
|
16
|
+
const doc = parse(data);
|
|
17
|
+
expect(doc).toBeDefined();
|
|
18
|
+
// TODO: Implement DOM serialization and comparison
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
});
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { readFileSync } from "fs";
|
|
2
|
+
import { parse } from "../src/index.ts";
|
|
3
|
+
|
|
4
|
+
describe("Tree Construction Tests_innerHTML_1 Tests", () => {
|
|
5
|
+
const content = readFileSync("tests/html5lib-data/tree-construction/tests_innerHTML_1.dat", "utf8");
|
|
6
|
+
const sections = content.split("#data\n");
|
|
7
|
+
|
|
8
|
+
for (let i = 1; i < sections.length; i++) {
|
|
9
|
+
const section = sections[i];
|
|
10
|
+
const [dataPart, documentPart] = section.split("#document\n");
|
|
11
|
+
const data = dataPart.trim();
|
|
12
|
+
const expectedDocument = documentPart ? documentPart.split("#errors\n")[0].trim() : "";
|
|
13
|
+
const errors = documentPart && documentPart.includes("#errors\n") ? documentPart.split("#errors\n")[1].trim() : "";
|
|
14
|
+
|
|
15
|
+
it(`Tests_innerHTML_1 test ${i}`, () => {
|
|
16
|
+
const doc = parse(data);
|
|
17
|
+
expect(doc).toBeDefined();
|
|
18
|
+
// TODO: Implement DOM serialization and comparison
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
});
|
package/tests/official/README.md
DELETED
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
# Official HTML Parser Tests
|
|
2
|
-
|
|
3
|
-
This directory contains implementations of official HTML parsing test suites to ensure compliance with web standards.
|
|
4
|
-
|
|
5
|
-
## Test Sources
|
|
6
|
-
|
|
7
|
-
### HTML5lib Tests
|
|
8
|
-
- **Tokenizer Tests**: JSON format tests from `html5lib-tests/tokenizer/`
|
|
9
|
-
- **Tree Construction Tests**: DAT format tests from `html5lib-tests/tree-construction/`
|
|
10
|
-
|
|
11
|
-
### Web Platform Tests (WPT)
|
|
12
|
-
- **Parsing Tests**: HTML format tests from `wpt/html/syntax/parsing/`
|
|
13
|
-
|
|
14
|
-
### Benchmark/Compliance Tests
|
|
15
|
-
- **Acid Tests**: Standardized rendering tests (Acid1, Acid2, Acid3)
|
|
16
|
-
- **HTML5 Test Suite**: Comprehensive HTML5 compliance tests
|
|
17
|
-
|
|
18
|
-
## Test Structure
|
|
19
|
-
|
|
20
|
-
```
|
|
21
|
-
tests/official/
|
|
22
|
-
├── html5lib/
|
|
23
|
-
│ ├── tokenizer/ # JSON tokenizer tests
|
|
24
|
-
│ ├── tree-construction/ # DAT tree construction tests
|
|
25
|
-
│ └── utils/ # HTML5lib test utilities
|
|
26
|
-
├── wpt/ # Web Platform Tests
|
|
27
|
-
├── acid/ # Acid tests
|
|
28
|
-
├── benchmarks/ # Performance benchmarks
|
|
29
|
-
└── compliance/ # Compliance test results
|
|
30
|
-
```
|
|
31
|
-
|
|
32
|
-
## Test Formats
|
|
33
|
-
|
|
34
|
-
### HTML5lib Tokenizer Tests (JSON)
|
|
35
|
-
```json
|
|
36
|
-
{
|
|
37
|
-
"tests": [
|
|
38
|
-
{
|
|
39
|
-
"description": "Test description",
|
|
40
|
-
"input": "input_string",
|
|
41
|
-
"output": [expected_output_tokens],
|
|
42
|
-
"initialStates": [initial_states],
|
|
43
|
-
"lastStartTag": "last_start_tag",
|
|
44
|
-
"errors": [parse_errors]
|
|
45
|
-
}
|
|
46
|
-
]
|
|
47
|
-
}
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
### HTML5lib Tree Construction Tests (DAT)
|
|
51
|
-
```
|
|
52
|
-
#data
|
|
53
|
-
<html>
|
|
54
|
-
#errors
|
|
55
|
-
(1,6): expected-doctype-but-got-start-tag
|
|
56
|
-
#document
|
|
57
|
-
| <html>
|
|
58
|
-
| <head>
|
|
59
|
-
| <body>
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
### Web Platform Tests (HTML)
|
|
63
|
-
Standard HTML files with embedded test assertions and expected results.
|
|
64
|
-
|
|
65
|
-
## Usage
|
|
66
|
-
|
|
67
|
-
```bash
|
|
68
|
-
# Run all official tests
|
|
69
|
-
bun test tests/official/
|
|
70
|
-
|
|
71
|
-
# Run specific test suite
|
|
72
|
-
bun test tests/official/html5lib/
|
|
73
|
-
bun test tests/official/wpt/
|
|
74
|
-
bun test tests/official/acid/
|
|
75
|
-
|
|
76
|
-
# Run with coverage
|
|
77
|
-
bun test --coverage tests/official/
|
|
78
|
-
```
|
|
79
|
-
|
|
80
|
-
## Test Results
|
|
81
|
-
|
|
82
|
-
Results are automatically generated and stored in `tests/official/compliance/` with detailed reports on:
|
|
83
|
-
- Tokenizer compliance
|
|
84
|
-
- Tree construction compliance
|
|
85
|
-
- Error handling accuracy
|
|
86
|
-
- Performance benchmarks
|
|
87
|
-
- Standards compliance scores
|
|
@@ -1,309 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from 'bun:test';
|
|
2
|
-
import { tokenize } from '../../../src/tokenizer';
|
|
3
|
-
import { parse } from '../../../src/parser';
|
|
4
|
-
|
|
5
|
-
describe('Acid Tests Compliance', () => {
|
|
6
|
-
describe('Acid1 Test', () => {
|
|
7
|
-
it('should parse basic HTML structure correctly', () => {
|
|
8
|
-
const acid1Html = `
|
|
9
|
-
<!DOCTYPE html>
|
|
10
|
-
<html>
|
|
11
|
-
<head>
|
|
12
|
-
<title>Acid1 Test</title>
|
|
13
|
-
</head>
|
|
14
|
-
<body>
|
|
15
|
-
<div>
|
|
16
|
-
<p>Hello <b>World</b></p>
|
|
17
|
-
<table>
|
|
18
|
-
<tr>
|
|
19
|
-
<td>Cell 1</td>
|
|
20
|
-
<td>Cell 2</td>
|
|
21
|
-
</tr>
|
|
22
|
-
</table>
|
|
23
|
-
</div>
|
|
24
|
-
</body>
|
|
25
|
-
</html>
|
|
26
|
-
`;
|
|
27
|
-
|
|
28
|
-
const tokens = tokenize(acid1Html);
|
|
29
|
-
const ast = parse(tokens);
|
|
30
|
-
|
|
31
|
-
expect(ast).toBeDefined();
|
|
32
|
-
expect((ast as any).type).toBe('DOCUMENT');
|
|
33
|
-
expect((ast as any).children?.length).toBeGreaterThan(0);
|
|
34
|
-
});
|
|
35
|
-
|
|
36
|
-
it('should handle nested elements', () => {
|
|
37
|
-
const nestedHtml = `
|
|
38
|
-
<div>
|
|
39
|
-
<p>Text <strong>bold <em>italic</em></strong> more text</p>
|
|
40
|
-
</div>
|
|
41
|
-
`;
|
|
42
|
-
|
|
43
|
-
const tokens = tokenize(nestedHtml);
|
|
44
|
-
const ast = parse(tokens);
|
|
45
|
-
|
|
46
|
-
expect(ast).toBeDefined();
|
|
47
|
-
expect(ast.children?.length).toBeGreaterThan(0);
|
|
48
|
-
});
|
|
49
|
-
|
|
50
|
-
it('should handle self-closing tags', () => {
|
|
51
|
-
const selfClosingHtml = `
|
|
52
|
-
<div>
|
|
53
|
-
<img src="test.jpg" alt="test">
|
|
54
|
-
<br>
|
|
55
|
-
<hr>
|
|
56
|
-
</div>
|
|
57
|
-
`;
|
|
58
|
-
|
|
59
|
-
const tokens = tokenize(selfClosingHtml);
|
|
60
|
-
const ast = parse(tokens);
|
|
61
|
-
|
|
62
|
-
expect(ast).toBeDefined();
|
|
63
|
-
});
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
describe('Acid2 Test', () => {
|
|
67
|
-
it('should handle CSS and more complex HTML', () => {
|
|
68
|
-
const acid2Html = `
|
|
69
|
-
<!DOCTYPE html>
|
|
70
|
-
<html>
|
|
71
|
-
<head>
|
|
72
|
-
<style>
|
|
73
|
-
body { margin: 0; }
|
|
74
|
-
.test { color: red; }
|
|
75
|
-
</style>
|
|
76
|
-
</head>
|
|
77
|
-
<body>
|
|
78
|
-
<div class="test">
|
|
79
|
-
<span>Styled text</span>
|
|
80
|
-
</div>
|
|
81
|
-
</body>
|
|
82
|
-
</html>
|
|
83
|
-
`;
|
|
84
|
-
|
|
85
|
-
const tokens = tokenize(acid2Html);
|
|
86
|
-
const ast = parse(tokens);
|
|
87
|
-
|
|
88
|
-
expect(ast).toBeDefined();
|
|
89
|
-
expect((ast as any).type).toBe('DOCUMENT');
|
|
90
|
-
});
|
|
91
|
-
|
|
92
|
-
it('should handle complex table structures', () => {
|
|
93
|
-
const complexTable = `
|
|
94
|
-
<table>
|
|
95
|
-
<thead>
|
|
96
|
-
<tr>
|
|
97
|
-
<th colspan="2">Header</th>
|
|
98
|
-
</tr>
|
|
99
|
-
</thead>
|
|
100
|
-
<tbody>
|
|
101
|
-
<tr>
|
|
102
|
-
<td rowspan="2">Cell 1</td>
|
|
103
|
-
<td>Cell 2</td>
|
|
104
|
-
</tr>
|
|
105
|
-
<tr>
|
|
106
|
-
<td>Cell 3</td>
|
|
107
|
-
</tr>
|
|
108
|
-
</tbody>
|
|
109
|
-
</table>
|
|
110
|
-
`;
|
|
111
|
-
|
|
112
|
-
const tokens = tokenize(complexTable);
|
|
113
|
-
const ast = parse(tokens);
|
|
114
|
-
|
|
115
|
-
expect(ast).toBeDefined();
|
|
116
|
-
});
|
|
117
|
-
});
|
|
118
|
-
|
|
119
|
-
describe('Acid3 Test', () => {
|
|
120
|
-
it('should handle advanced HTML5 features', () => {
|
|
121
|
-
const acid3Html = `
|
|
122
|
-
<!DOCTYPE html>
|
|
123
|
-
<html>
|
|
124
|
-
<head>
|
|
125
|
-
<meta charset="UTF-8">
|
|
126
|
-
<title>Acid3 Test</title>
|
|
127
|
-
</head>
|
|
128
|
-
<body>
|
|
129
|
-
<article>
|
|
130
|
-
<header>
|
|
131
|
-
<h1>Article Title</h1>
|
|
132
|
-
</header>
|
|
133
|
-
<section>
|
|
134
|
-
<p>Article content</p>
|
|
135
|
-
</section>
|
|
136
|
-
<footer>
|
|
137
|
-
<p>Footer content</p>
|
|
138
|
-
</footer>
|
|
139
|
-
</article>
|
|
140
|
-
</body>
|
|
141
|
-
</html>
|
|
142
|
-
`;
|
|
143
|
-
|
|
144
|
-
const tokens = tokenize(acid3Html);
|
|
145
|
-
const ast = parse(tokens);
|
|
146
|
-
|
|
147
|
-
expect(ast).toBeDefined();
|
|
148
|
-
expect((ast as any).type).toBe('DOCUMENT');
|
|
149
|
-
});
|
|
150
|
-
|
|
151
|
-
it('should handle HTML5 semantic elements', () => {
|
|
152
|
-
const semanticHtml = `
|
|
153
|
-
<main>
|
|
154
|
-
<nav>
|
|
155
|
-
<ul>
|
|
156
|
-
<li><a href="#home">Home</a></li>
|
|
157
|
-
<li><a href="#about">About</a></li>
|
|
158
|
-
</ul>
|
|
159
|
-
</nav>
|
|
160
|
-
<aside>
|
|
161
|
-
<p>Sidebar content</p>
|
|
162
|
-
</aside>
|
|
163
|
-
</main>
|
|
164
|
-
`;
|
|
165
|
-
|
|
166
|
-
const tokens = tokenize(semanticHtml);
|
|
167
|
-
const ast = parse(tokens);
|
|
168
|
-
|
|
169
|
-
expect(ast).toBeDefined();
|
|
170
|
-
});
|
|
171
|
-
});
|
|
172
|
-
});
|
|
173
|
-
|
|
174
|
-
describe('Quirks Mode Tests', () => {
|
|
175
|
-
it('should handle quirks mode HTML', () => {
|
|
176
|
-
const quirksHtml = `
|
|
177
|
-
<html>
|
|
178
|
-
<body>
|
|
179
|
-
<div>
|
|
180
|
-
<p>No DOCTYPE - should trigger quirks mode
|
|
181
|
-
<p>Unclosed paragraphs
|
|
182
|
-
<div>Nested without proper closing
|
|
183
|
-
</div>
|
|
184
|
-
</body>
|
|
185
|
-
</html>
|
|
186
|
-
`;
|
|
187
|
-
|
|
188
|
-
const tokens = tokenize(quirksHtml);
|
|
189
|
-
const ast = parse(tokens);
|
|
190
|
-
|
|
191
|
-
expect(ast).toBeDefined();
|
|
192
|
-
expect((ast as any).type).toBe('DOCUMENT');
|
|
193
|
-
});
|
|
194
|
-
|
|
195
|
-
it('should handle malformed HTML gracefully', () => {
|
|
196
|
-
const malformedHtml = `
|
|
197
|
-
<div>
|
|
198
|
-
<p>Unclosed paragraph
|
|
199
|
-
<span>Unclosed span
|
|
200
|
-
<b>Bold text
|
|
201
|
-
<i>Italic text
|
|
202
|
-
</div>
|
|
203
|
-
`;
|
|
204
|
-
|
|
205
|
-
const tokens = tokenize(malformedHtml);
|
|
206
|
-
const ast = parse(tokens);
|
|
207
|
-
|
|
208
|
-
expect(ast).toBeDefined();
|
|
209
|
-
});
|
|
210
|
-
|
|
211
|
-
it('should handle mismatched tags', () => {
|
|
212
|
-
const mismatchedHtml = `
|
|
213
|
-
<div>
|
|
214
|
-
<p>Paragraph</div>
|
|
215
|
-
<span>Span</p>
|
|
216
|
-
</span>
|
|
217
|
-
`;
|
|
218
|
-
|
|
219
|
-
const tokens = tokenize(mismatchedHtml);
|
|
220
|
-
const ast = parse(tokens);
|
|
221
|
-
|
|
222
|
-
expect(ast).toBeDefined();
|
|
223
|
-
});
|
|
224
|
-
});
|
|
225
|
-
|
|
226
|
-
describe('Performance Benchmarks', () => {
|
|
227
|
-
it('should parse small HTML quickly', () => {
|
|
228
|
-
const smallHtml = '<div><p>Hello World</p></div>';
|
|
229
|
-
|
|
230
|
-
const start = performance.now();
|
|
231
|
-
const tokens = tokenize(smallHtml);
|
|
232
|
-
const ast = parse(tokens);
|
|
233
|
-
const end = performance.now();
|
|
234
|
-
|
|
235
|
-
expect(ast).toBeDefined();
|
|
236
|
-
expect(end - start).toBeLessThan(10);
|
|
237
|
-
});
|
|
238
|
-
|
|
239
|
-
it('should handle medium-sized HTML', () => {
|
|
240
|
-
const mediumHtml = Array(100).fill('<div><p>Content</p></div>').join('');
|
|
241
|
-
|
|
242
|
-
const start = performance.now();
|
|
243
|
-
const tokens = tokenize(mediumHtml);
|
|
244
|
-
const ast = parse(tokens);
|
|
245
|
-
const end = performance.now();
|
|
246
|
-
|
|
247
|
-
expect(ast).toBeDefined();
|
|
248
|
-
expect(end - start).toBeLessThan(100);
|
|
249
|
-
});
|
|
250
|
-
|
|
251
|
-
it('should handle large HTML documents', () => {
|
|
252
|
-
const largeHtml = Array(1000).fill('<div><p>Large content</p></div>').join('');
|
|
253
|
-
|
|
254
|
-
const start = performance.now();
|
|
255
|
-
const tokens = tokenize(largeHtml);
|
|
256
|
-
const ast = parse(tokens);
|
|
257
|
-
const end = performance.now();
|
|
258
|
-
|
|
259
|
-
expect(ast).toBeDefined();
|
|
260
|
-
expect(end - start).toBeLessThan(1000);
|
|
261
|
-
});
|
|
262
|
-
|
|
263
|
-
it('should handle deeply nested HTML', () => {
|
|
264
|
-
let deepHtml = '';
|
|
265
|
-
for (let i = 0; i < 100; i++) {
|
|
266
|
-
deepHtml += '<div>';
|
|
267
|
-
}
|
|
268
|
-
deepHtml += 'Deep content';
|
|
269
|
-
for (let i = 0; i < 100; i++) {
|
|
270
|
-
deepHtml += '</div>';
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
const start = performance.now();
|
|
274
|
-
const tokens = tokenize(deepHtml);
|
|
275
|
-
const ast = parse(tokens);
|
|
276
|
-
const end = performance.now();
|
|
277
|
-
|
|
278
|
-
expect(ast).toBeDefined();
|
|
279
|
-
expect(end - start).toBeLessThan(500);
|
|
280
|
-
});
|
|
281
|
-
});
|
|
282
|
-
|
|
283
|
-
describe('Memory Usage Tests', () => {
|
|
284
|
-
it('should not leak memory on repeated parsing', () => {
|
|
285
|
-
const testHtml = '<div><p>Memory test</p></div>';
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
for (let i = 0; i < 1000; i++) {
|
|
289
|
-
const tokens = tokenize(testHtml);
|
|
290
|
-
const ast = parse(tokens);
|
|
291
|
-
expect(ast).toBeDefined();
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
expect(true).toBe(true);
|
|
296
|
-
});
|
|
297
|
-
|
|
298
|
-
it('should handle multiple large documents', () => {
|
|
299
|
-
const largeHtml = Array(500).fill('<div><p>Large content</p></div>').join('');
|
|
300
|
-
|
|
301
|
-
for (let i = 0; i < 10; i++) {
|
|
302
|
-
const tokens = tokenize(largeHtml);
|
|
303
|
-
const ast = parse(tokens);
|
|
304
|
-
expect(ast).toBeDefined();
|
|
305
|
-
}
|
|
306
|
-
|
|
307
|
-
expect(true).toBe(true);
|
|
308
|
-
});
|
|
309
|
-
});
|