@tkeron/html-parser 0.1.7 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -7
- package/bun.lock +5 -0
- package/index.ts +4 -0
- package/package.json +7 -1
- package/src/css-selector.ts +1 -1
- package/src/dom-simulator.ts +41 -17
- package/src/encoding.ts +39 -0
- package/src/index.ts +9 -0
- package/src/parser.ts +509 -143
- package/src/serializer.ts +450 -0
- package/src/tokenizer.ts +190 -118
- package/tests/advanced.test.ts +121 -108
- package/tests/custom-elements-head.test.ts +105 -0
- package/tests/dom-extended.test.ts +12 -12
- package/tests/dom-manipulation.test.ts +9 -10
- package/tests/dom.test.ts +32 -27
- package/tests/helpers/tokenizer-adapter.test.ts +70 -0
- package/tests/helpers/tokenizer-adapter.ts +65 -0
- package/tests/helpers/tree-adapter.test.ts +39 -0
- package/tests/helpers/tree-adapter.ts +60 -0
- package/tests/html5lib-data/tokenizer/namedEntities.test +42422 -0
- package/tests/html5lib-data/tokenizer/pendingSpecChanges.test +9 -0
- package/tests/html5lib-data/tree-construction/adoption01.dat +354 -0
- package/tests/html5lib-data/tree-construction/adoption02.dat +39 -0
- package/tests/html5lib-data/tree-construction/domjs-unsafe.dat +0 -0
- package/tests/html5lib-data/tree-construction/entities02.dat +309 -0
- package/tests/html5lib-data/tree-construction/html5test-com.dat +301 -0
- package/tests/html5lib-data/tree-construction/math.dat +104 -0
- package/tests/html5lib-data/tree-construction/namespace-sensitivity.dat +22 -0
- package/tests/html5lib-data/tree-construction/noscript01.dat +237 -0
- package/tests/html5lib-data/tree-construction/ruby.dat +302 -0
- package/tests/html5lib-data/tree-construction/scriptdata01.dat +372 -0
- package/tests/html5lib-data/tree-construction/svg.dat +104 -0
- package/tests/html5lib-data/tree-construction/template.dat +1673 -0
- package/tests/html5lib-data/tree-construction/tests10.dat +853 -0
- package/tests/html5lib-data/tree-construction/tests11.dat +523 -0
- package/tests/html5lib-data/tree-construction/tests20.dat +842 -0
- package/tests/html5lib-data/tree-construction/tests21.dat +306 -0
- package/tests/html5lib-data/tree-construction/tests23.dat +168 -0
- package/tests/html5lib-data/tree-construction/tests24.dat +79 -0
- package/tests/html5lib-data/tree-construction/tests5.dat +210 -0
- package/tests/html5lib-data/tree-construction/tests6.dat +663 -0
- package/tests/html5lib-data/tree-construction/tests_innerHTML_1.dat +844 -0
- package/tests/parser.test.ts +173 -193
- package/tests/serializer-core.test.ts +16 -0
- package/tests/serializer-data/core.test +125 -0
- package/tests/serializer-data/injectmeta.test +66 -0
- package/tests/serializer-data/optionaltags.test +965 -0
- package/tests/serializer-data/options.test +60 -0
- package/tests/serializer-data/whitespace.test +51 -0
- package/tests/serializer-injectmeta.test.ts +16 -0
- package/tests/serializer-optionaltags.test.ts +16 -0
- package/tests/serializer-options.test.ts +16 -0
- package/tests/serializer-whitespace.test.ts +16 -0
- package/tests/tokenizer-namedEntities.test.ts +20 -0
- package/tests/tokenizer-pendingSpecChanges.test.ts +20 -0
- package/tests/tokenizer.test.ts +25 -32
- package/tests/tree-construction-adoption01.test.ts +37 -0
- package/tests/tree-construction-adoption02.test.ts +34 -0
- package/tests/tree-construction-domjs-unsafe.test.ts +24 -0
- package/tests/tree-construction-entities02.test.ts +33 -0
- package/tests/tree-construction-html5test-com.test.ts +32 -0
- package/tests/tree-construction-math.test.ts +18 -0
- package/tests/tree-construction-namespace-sensitivity.test.ts +18 -0
- package/tests/tree-construction-noscript01.test.ts +18 -0
- package/tests/tree-construction-ruby.test.ts +21 -0
- package/tests/tree-construction-scriptdata01.test.ts +21 -0
- package/tests/tree-construction-svg.test.ts +21 -0
- package/tests/tree-construction-template.test.ts +21 -0
- package/tests/tree-construction-tests10.test.ts +21 -0
- package/tests/tree-construction-tests11.test.ts +21 -0
- package/tests/tree-construction-tests20.test.ts +18 -0
- package/tests/tree-construction-tests21.test.ts +18 -0
- package/tests/tree-construction-tests23.test.ts +18 -0
- package/tests/tree-construction-tests24.test.ts +18 -0
- package/tests/tree-construction-tests5.test.ts +21 -0
- package/tests/tree-construction-tests6.test.ts +21 -0
- package/tests/tree-construction-tests_innerHTML_1.test.ts +21 -0
- package/tests/custom-elements.test.ts +0 -745
- package/tests/official/README.md +0 -87
- package/tests/official/acid/acid-tests.test.ts +0 -309
- package/tests/official/final-output/final-output.test.ts +0 -361
- package/tests/official/html5lib/tokenizer-utils.ts +0 -192
- package/tests/official/html5lib/tokenizer.test.ts +0 -171
- package/tests/official/html5lib/tree-construction-utils.ts +0 -194
- package/tests/official/html5lib/tree-construction.test.ts +0 -250
- package/tests/official/validator/validator-tests.test.ts +0 -237
- package/tests/official/validator-nu/validator-nu.test.ts +0 -335
- package/tests/official/whatwg/whatwg-tests.test.ts +0 -205
- package/tests/official/wpt/wpt-tests.test.ts +0 -409
|
@@ -1,171 +0,0 @@
|
|
|
1
|
-
import { describe, it } from 'bun:test';
|
|
2
|
-
import {
|
|
3
|
-
loadHTML5libTokenizerTests,
|
|
4
|
-
runHTML5libTokenizerTestSuite,
|
|
5
|
-
type HTML5libTokenizerTestSuite
|
|
6
|
-
} from './tokenizer-utils';
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
const basicTokenizerTests: HTML5libTokenizerTestSuite = {
|
|
10
|
-
"tests": [
|
|
11
|
-
{
|
|
12
|
-
"description": "Correct Doctype lowercase",
|
|
13
|
-
"input": "<!DOCTYPE html>",
|
|
14
|
-
"output": [["DOCTYPE", "html", null, null, true]]
|
|
15
|
-
},
|
|
16
|
-
{
|
|
17
|
-
"description": "Correct Doctype uppercase",
|
|
18
|
-
"input": "<!DOCTYPE HTML>",
|
|
19
|
-
"output": [["DOCTYPE", "html", null, null, true]]
|
|
20
|
-
},
|
|
21
|
-
{
|
|
22
|
-
"description": "Single Start Tag",
|
|
23
|
-
"input": "<h>",
|
|
24
|
-
"output": [["StartTag", "h", {}]]
|
|
25
|
-
},
|
|
26
|
-
{
|
|
27
|
-
"description": "Start Tag w/attribute",
|
|
28
|
-
"input": "<h a='b'>",
|
|
29
|
-
"output": [["StartTag", "h", { "a": "b" }]]
|
|
30
|
-
},
|
|
31
|
-
{
|
|
32
|
-
"description": "Start/End Tag",
|
|
33
|
-
"input": "<h></h>",
|
|
34
|
-
"output": [["StartTag", "h", {}], ["EndTag", "h"]]
|
|
35
|
-
},
|
|
36
|
-
{
|
|
37
|
-
"description": "Simple comment",
|
|
38
|
-
"input": "<!--comment-->",
|
|
39
|
-
"output": [["Comment", "comment"]]
|
|
40
|
-
},
|
|
41
|
-
{
|
|
42
|
-
"description": "Character data",
|
|
43
|
-
"input": "Hello World",
|
|
44
|
-
"output": [["Character", "Hello World"]]
|
|
45
|
-
},
|
|
46
|
-
{
|
|
47
|
-
"description": "Multiple attributes",
|
|
48
|
-
"input": "<h a='b' c='d'>",
|
|
49
|
-
"output": [["StartTag", "h", { "a": "b", "c": "d" }]]
|
|
50
|
-
},
|
|
51
|
-
{
|
|
52
|
-
"description": "Self-closing tag",
|
|
53
|
-
"input": "<br/>",
|
|
54
|
-
"output": [["StartTag", "br", {}, true]]
|
|
55
|
-
},
|
|
56
|
-
{
|
|
57
|
-
"description": "Empty comment",
|
|
58
|
-
"input": "<!---->",
|
|
59
|
-
"output": [["Comment", ""]]
|
|
60
|
-
},
|
|
61
|
-
{
|
|
62
|
-
"description": "Text with entities",
|
|
63
|
-
"input": "&<>",
|
|
64
|
-
"output": [["Character", "&<>"]]
|
|
65
|
-
},
|
|
66
|
-
{
|
|
67
|
-
"description": "Numeric entity",
|
|
68
|
-
"input": "A",
|
|
69
|
-
"output": [["Character", "A"]]
|
|
70
|
-
},
|
|
71
|
-
{
|
|
72
|
-
"description": "Hex entity",
|
|
73
|
-
"input": "A",
|
|
74
|
-
"output": [["Character", "A"]]
|
|
75
|
-
},
|
|
76
|
-
{
|
|
77
|
-
"description": "Unquoted attribute",
|
|
78
|
-
"input": "<h a=b>",
|
|
79
|
-
"output": [["StartTag", "h", { "a": "b" }]]
|
|
80
|
-
},
|
|
81
|
-
{
|
|
82
|
-
"description": "Tag with mixed case",
|
|
83
|
-
"input": "<DiV>",
|
|
84
|
-
"output": [["StartTag", "div", {}]]
|
|
85
|
-
}
|
|
86
|
-
]
|
|
87
|
-
};
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
const entityTests: HTML5libTokenizerTestSuite = {
|
|
91
|
-
"tests": [
|
|
92
|
-
{
|
|
93
|
-
"description": "Entity with trailing semicolon",
|
|
94
|
-
"input": "I'm ¬it",
|
|
95
|
-
"output": [["Character", "I'm ¬it"]]
|
|
96
|
-
},
|
|
97
|
-
{
|
|
98
|
-
"description": "Entity without trailing semicolon",
|
|
99
|
-
"input": "I'm ¬it",
|
|
100
|
-
"output": [["Character", "I'm ¬it"]],
|
|
101
|
-
"errors": [
|
|
102
|
-
{ "code": "missing-semicolon-after-character-reference", "line": 1, "col": 9 }
|
|
103
|
-
]
|
|
104
|
-
},
|
|
105
|
-
{
|
|
106
|
-
"description": "Ampersand EOF",
|
|
107
|
-
"input": "&",
|
|
108
|
-
"output": [["Character", "&"]]
|
|
109
|
-
},
|
|
110
|
-
{
|
|
111
|
-
"description": "Unfinished entity",
|
|
112
|
-
"input": "&f",
|
|
113
|
-
"output": [["Character", "&f"]]
|
|
114
|
-
},
|
|
115
|
-
{
|
|
116
|
-
"description": "Ampersand, number sign",
|
|
117
|
-
"input": "&#",
|
|
118
|
-
"output": [["Character", "&#"]],
|
|
119
|
-
"errors": [
|
|
120
|
-
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 }
|
|
121
|
-
]
|
|
122
|
-
}
|
|
123
|
-
]
|
|
124
|
-
};
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
const commentTests: HTML5libTokenizerTestSuite = {
|
|
128
|
-
"tests": [
|
|
129
|
-
{
|
|
130
|
-
"description": "Comment, Central dash no space",
|
|
131
|
-
"input": "<!----->",
|
|
132
|
-
"output": [["Comment", "-"]]
|
|
133
|
-
},
|
|
134
|
-
{
|
|
135
|
-
"description": "Comment, two central dashes",
|
|
136
|
-
"input": "<!-- --comment -->",
|
|
137
|
-
"output": [["Comment", " --comment "]]
|
|
138
|
-
},
|
|
139
|
-
{
|
|
140
|
-
"description": "Unfinished comment",
|
|
141
|
-
"input": "<!--comment",
|
|
142
|
-
"output": [["Comment", "comment"]],
|
|
143
|
-
"errors": [
|
|
144
|
-
{ "code": "eof-in-comment", "line": 1, "col": 12 }
|
|
145
|
-
]
|
|
146
|
-
},
|
|
147
|
-
{
|
|
148
|
-
"description": "Short comment",
|
|
149
|
-
"input": "<!-->",
|
|
150
|
-
"output": [["Comment", ""]],
|
|
151
|
-
"errors": [
|
|
152
|
-
{ "code": "abrupt-closing-of-empty-comment", "line": 1, "col": 5 }
|
|
153
|
-
]
|
|
154
|
-
},
|
|
155
|
-
{
|
|
156
|
-
"description": "Nested comment",
|
|
157
|
-
"input": "<!-- <!--test-->",
|
|
158
|
-
"output": [["Comment", " <!--test"]],
|
|
159
|
-
"errors": [
|
|
160
|
-
{ "code": "nested-comment", "line": 1, "col": 10 }
|
|
161
|
-
]
|
|
162
|
-
}
|
|
163
|
-
]
|
|
164
|
-
};
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
describe('HTML5lib Tokenizer Tests', () => {
|
|
168
|
-
runHTML5libTokenizerTestSuite(basicTokenizerTests, 'Basic Tokenizer');
|
|
169
|
-
runHTML5libTokenizerTestSuite(entityTests, 'Entity Handling');
|
|
170
|
-
runHTML5libTokenizerTestSuite(commentTests, 'Comment Handling');
|
|
171
|
-
});
|
|
@@ -1,194 +0,0 @@
|
|
|
1
|
-
import { expect, describe, it } from 'bun:test';
|
|
2
|
-
import { parse } from '../../../src/parser';
|
|
3
|
-
import { tokenize } from '../../../src/tokenizer';
|
|
4
|
-
import type { ASTNode } from '../../../src/parser';
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
export interface HTML5libTreeTest {
|
|
8
|
-
data: string;
|
|
9
|
-
errors: string[];
|
|
10
|
-
newErrors?: string[];
|
|
11
|
-
documentFragment?: string;
|
|
12
|
-
scriptOff?: boolean;
|
|
13
|
-
scriptOn?: boolean;
|
|
14
|
-
document: string;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
export function parseHTML5libDATFile(content: string): HTML5libTreeTest[] {
|
|
19
|
-
const tests: HTML5libTreeTest[] = [];
|
|
20
|
-
const sections = content.split('\n\n').filter(section => section.trim());
|
|
21
|
-
|
|
22
|
-
for (const section of sections) {
|
|
23
|
-
const lines = section.split('\n');
|
|
24
|
-
const test: Partial<HTML5libTreeTest> = {
|
|
25
|
-
errors: []
|
|
26
|
-
};
|
|
27
|
-
|
|
28
|
-
let currentSection = '';
|
|
29
|
-
let currentContent: string[] = [];
|
|
30
|
-
|
|
31
|
-
for (const line of lines) {
|
|
32
|
-
if (line.startsWith('#')) {
|
|
33
|
-
|
|
34
|
-
if (currentSection) {
|
|
35
|
-
switch (currentSection) {
|
|
36
|
-
case 'data':
|
|
37
|
-
test.data = currentContent.join('\n');
|
|
38
|
-
break;
|
|
39
|
-
case 'errors':
|
|
40
|
-
test.errors = currentContent.filter(l => l.trim());
|
|
41
|
-
break;
|
|
42
|
-
case 'new-errors':
|
|
43
|
-
test.newErrors = currentContent.filter(l => l.trim());
|
|
44
|
-
break;
|
|
45
|
-
case 'document-fragment':
|
|
46
|
-
test.documentFragment = currentContent.join('\n');
|
|
47
|
-
break;
|
|
48
|
-
case 'document':
|
|
49
|
-
test.document = currentContent.join('\n');
|
|
50
|
-
break;
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
currentSection = line.substring(1);
|
|
56
|
-
currentContent = [];
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
if (currentSection === 'script-off') {
|
|
60
|
-
test.scriptOff = true;
|
|
61
|
-
} else if (currentSection === 'script-on') {
|
|
62
|
-
test.scriptOn = true;
|
|
63
|
-
}
|
|
64
|
-
} else {
|
|
65
|
-
currentContent.push(line);
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
if (currentSection) {
|
|
71
|
-
switch (currentSection) {
|
|
72
|
-
case 'data':
|
|
73
|
-
test.data = currentContent.join('\n');
|
|
74
|
-
break;
|
|
75
|
-
case 'errors':
|
|
76
|
-
test.errors = currentContent.filter(l => l.trim());
|
|
77
|
-
break;
|
|
78
|
-
case 'new-errors':
|
|
79
|
-
test.newErrors = currentContent.filter(l => l.trim());
|
|
80
|
-
break;
|
|
81
|
-
case 'document-fragment':
|
|
82
|
-
test.documentFragment = currentContent.join('\n');
|
|
83
|
-
break;
|
|
84
|
-
case 'document':
|
|
85
|
-
test.document = currentContent.join('\n');
|
|
86
|
-
break;
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
if (test.data && test.document) {
|
|
91
|
-
tests.push(test as HTML5libTreeTest);
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
return tests;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
export function convertASTToHTML5libTree(node: ASTNode, depth: number = 0): string[] {
|
|
100
|
-
const lines: string[] = [];
|
|
101
|
-
const indent = '| ' + ' '.repeat(depth);
|
|
102
|
-
|
|
103
|
-
switch (node.type) {
|
|
104
|
-
case 'DOCUMENT':
|
|
105
|
-
|
|
106
|
-
break;
|
|
107
|
-
case 'DOCTYPE':
|
|
108
|
-
lines.push(`${indent}<!DOCTYPE ${node.tagName || 'html'}>`);
|
|
109
|
-
break;
|
|
110
|
-
case 'ELEMENT':
|
|
111
|
-
const tagName = node.tagName || 'unknown';
|
|
112
|
-
lines.push(`${indent}<${tagName}>`);
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
if (node.attributes) {
|
|
116
|
-
for (const [name, value] of Object.entries(node.attributes).sort()) {
|
|
117
|
-
lines.push(`${indent} ${name}="${value}"`);
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
break;
|
|
121
|
-
case 'TEXT':
|
|
122
|
-
if (node.content && node.content.trim()) {
|
|
123
|
-
lines.push(`${indent}"${node.content}"`);
|
|
124
|
-
}
|
|
125
|
-
break;
|
|
126
|
-
case 'COMMENT':
|
|
127
|
-
lines.push(`${indent}<!-- ${node.content || ''} -->`);
|
|
128
|
-
break;
|
|
129
|
-
case 'CDATA':
|
|
130
|
-
lines.push(`${indent}<![CDATA[${node.content || ''}]]>`);
|
|
131
|
-
break;
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
if (node.children) {
|
|
136
|
-
for (const child of node.children) {
|
|
137
|
-
lines.push(...convertASTToHTML5libTree(child, depth + 1));
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
return lines;
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
export function normalizeHTML5libTree(tree: string): string {
|
|
146
|
-
return tree
|
|
147
|
-
.split('\n')
|
|
148
|
-
.map(line => line.trim())
|
|
149
|
-
.filter(line => line.length > 0)
|
|
150
|
-
.join('\n');
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
export function runHTML5libTreeTest(test: HTML5libTreeTest, testName: string): void {
|
|
155
|
-
it(testName, () => {
|
|
156
|
-
const { data, document: expectedTree, documentFragment, scriptOff, scriptOn } = test;
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
const tokens = tokenize(data);
|
|
160
|
-
const ast = parse(tokens);
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
const actualTreeLines = convertASTToHTML5libTree(ast);
|
|
164
|
-
const actualTree = actualTreeLines.join('\n');
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
const normalizedActual = normalizeHTML5libTree(actualTree);
|
|
168
|
-
const normalizedExpected = normalizeHTML5libTree(expectedTree);
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
expect(normalizedActual).toBe(normalizedExpected);
|
|
172
|
-
});
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
export function runHTML5libTreeTestSuite(tests: HTML5libTreeTest[], suiteName: string): void {
|
|
177
|
-
describe(`HTML5lib Tree Construction Tests: ${suiteName}`, () => {
|
|
178
|
-
tests.forEach((test, index) => {
|
|
179
|
-
const testName = `Test ${index + 1}: ${test.data.substring(0, 50).replace(/\n/g, ' ')}...`;
|
|
180
|
-
runHTML5libTreeTest(test, testName);
|
|
181
|
-
});
|
|
182
|
-
});
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
export async function loadHTML5libTreeTests(testData: string, suiteName: string): Promise<void> {
|
|
187
|
-
const tests = parseHTML5libDATFile(testData);
|
|
188
|
-
runHTML5libTreeTestSuite(tests, suiteName);
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
export function validateHTML5libTreeTest(test: HTML5libTreeTest): boolean {
|
|
193
|
-
return !!(test.data && test.document && test.errors !== undefined);
|
|
194
|
-
}
|
|
@@ -1,250 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from "bun:test";
|
|
2
|
-
import {
|
|
3
|
-
loadHTML5libTreeTests,
|
|
4
|
-
runHTML5libTreeTestSuite,
|
|
5
|
-
parseHTML5libDATFile,
|
|
6
|
-
type HTML5libTreeTest,
|
|
7
|
-
} from "./tree-construction-utils";
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
const basicTreeTestData = `#data
|
|
11
|
-
Test
|
|
12
|
-
#errors
|
|
13
|
-
(1,0): expected-doctype-but-got-chars
|
|
14
|
-
#document
|
|
15
|
-
| <html>
|
|
16
|
-
| <head>
|
|
17
|
-
| <body>
|
|
18
|
-
| "Test"
|
|
19
|
-
|
|
20
|
-
#data
|
|
21
|
-
<p>One<p>Two
|
|
22
|
-
#errors
|
|
23
|
-
(1,3): expected-doctype-but-got-start-tag
|
|
24
|
-
#document
|
|
25
|
-
| <html>
|
|
26
|
-
| <head>
|
|
27
|
-
| <body>
|
|
28
|
-
| <p>
|
|
29
|
-
| "One"
|
|
30
|
-
| <p>
|
|
31
|
-
| "Two"
|
|
32
|
-
|
|
33
|
-
#data
|
|
34
|
-
<html>
|
|
35
|
-
#errors
|
|
36
|
-
(1,6): expected-doctype-but-got-start-tag
|
|
37
|
-
#document
|
|
38
|
-
| <html>
|
|
39
|
-
| <head>
|
|
40
|
-
| <body>
|
|
41
|
-
|
|
42
|
-
#data
|
|
43
|
-
<head>
|
|
44
|
-
#errors
|
|
45
|
-
(1,6): expected-doctype-but-got-start-tag
|
|
46
|
-
#document
|
|
47
|
-
| <html>
|
|
48
|
-
| <head>
|
|
49
|
-
| <body>
|
|
50
|
-
|
|
51
|
-
#data
|
|
52
|
-
<body>
|
|
53
|
-
#errors
|
|
54
|
-
(1,6): expected-doctype-but-got-start-tag
|
|
55
|
-
#document
|
|
56
|
-
| <html>
|
|
57
|
-
| <head>
|
|
58
|
-
| <body>
|
|
59
|
-
|
|
60
|
-
#data
|
|
61
|
-
<html><head></head><body></body>
|
|
62
|
-
#errors
|
|
63
|
-
(1,6): expected-doctype-but-got-start-tag
|
|
64
|
-
#document
|
|
65
|
-
| <html>
|
|
66
|
-
| <head>
|
|
67
|
-
| <body>
|
|
68
|
-
|
|
69
|
-
#data
|
|
70
|
-
Line1<br>Line2
|
|
71
|
-
#errors
|
|
72
|
-
(1,0): expected-doctype-but-got-chars
|
|
73
|
-
#document
|
|
74
|
-
| <html>
|
|
75
|
-
| <head>
|
|
76
|
-
| <body>
|
|
77
|
-
| "Line1"
|
|
78
|
-
| <br>
|
|
79
|
-
| "Line2"
|
|
80
|
-
|
|
81
|
-
#data
|
|
82
|
-
<div>hello</div>
|
|
83
|
-
#errors
|
|
84
|
-
(1,5): expected-doctype-but-got-start-tag
|
|
85
|
-
#document
|
|
86
|
-
| <html>
|
|
87
|
-
| <head>
|
|
88
|
-
| <body>
|
|
89
|
-
| <div>
|
|
90
|
-
| "hello"
|
|
91
|
-
|
|
92
|
-
#data
|
|
93
|
-
<p><b>bold</b></p>
|
|
94
|
-
#errors
|
|
95
|
-
(1,3): expected-doctype-but-got-start-tag
|
|
96
|
-
#document
|
|
97
|
-
| <html>
|
|
98
|
-
| <head>
|
|
99
|
-
| <body>
|
|
100
|
-
| <p>
|
|
101
|
-
| <b>
|
|
102
|
-
| "bold"
|
|
103
|
-
|
|
104
|
-
#data
|
|
105
|
-
<!--comment-->
|
|
106
|
-
#errors
|
|
107
|
-
(1,0): expected-doctype-but-got-chars
|
|
108
|
-
#document
|
|
109
|
-
| <html>
|
|
110
|
-
| <head>
|
|
111
|
-
| <body>
|
|
112
|
-
| <!-- comment -->`;
|
|
113
|
-
|
|
114
|
-
const doctypeTestData = `#data
|
|
115
|
-
<!DOCTYPE html>
|
|
116
|
-
#errors
|
|
117
|
-
#document
|
|
118
|
-
| <!DOCTYPE html>
|
|
119
|
-
| <html>
|
|
120
|
-
| <head>
|
|
121
|
-
| <body>
|
|
122
|
-
|
|
123
|
-
#data
|
|
124
|
-
<!DOCTYPE html><html><head><title>Test</title></head><body><p>Hello</p></body></html>
|
|
125
|
-
#errors
|
|
126
|
-
#document
|
|
127
|
-
| <!DOCTYPE html>
|
|
128
|
-
| <html>
|
|
129
|
-
| <head>
|
|
130
|
-
| <title>
|
|
131
|
-
| "Test"
|
|
132
|
-
| <body>
|
|
133
|
-
| <p>
|
|
134
|
-
| "Hello"
|
|
135
|
-
|
|
136
|
-
#data
|
|
137
|
-
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
|
|
138
|
-
#errors
|
|
139
|
-
#document
|
|
140
|
-
| <!DOCTYPE html>
|
|
141
|
-
| <html>
|
|
142
|
-
| <head>
|
|
143
|
-
| <body>
|
|
144
|
-
|
|
145
|
-
#data
|
|
146
|
-
<!DOCTYPE html SYSTEM "about:legacy-compat">
|
|
147
|
-
#errors
|
|
148
|
-
#document
|
|
149
|
-
| <!DOCTYPE html>
|
|
150
|
-
| <html>
|
|
151
|
-
| <head>
|
|
152
|
-
| <body>`;
|
|
153
|
-
|
|
154
|
-
const errorHandlingTestData = `#data
|
|
155
|
-
<b><table><td></b><i></table>
|
|
156
|
-
#errors
|
|
157
|
-
(1,3): expected-doctype-but-got-start-tag
|
|
158
|
-
(1,14): unexpected-cell-in-table-body
|
|
159
|
-
(1,18): unexpected-end-tag
|
|
160
|
-
(1,29): unexpected-cell-end-tag
|
|
161
|
-
(1,29): expected-closing-tag-but-got-eof
|
|
162
|
-
#document
|
|
163
|
-
| <html>
|
|
164
|
-
| <head>
|
|
165
|
-
| <body>
|
|
166
|
-
| <b>
|
|
167
|
-
| <table>
|
|
168
|
-
| <tbody>
|
|
169
|
-
| <tr>
|
|
170
|
-
| <td>
|
|
171
|
-
| <i>
|
|
172
|
-
|
|
173
|
-
#data
|
|
174
|
-
<p><b><div><marquee></p></b></div>
|
|
175
|
-
#errors
|
|
176
|
-
(1,3): expected-doctype-but-got-start-tag
|
|
177
|
-
(1,11): unexpected-end-tag
|
|
178
|
-
(1,24): unexpected-end-tag
|
|
179
|
-
(1,28): unexpected-end-tag
|
|
180
|
-
(1,34): end-tag-too-early
|
|
181
|
-
(1,34): expected-closing-tag-but-got-eof
|
|
182
|
-
#document
|
|
183
|
-
| <html>
|
|
184
|
-
| <head>
|
|
185
|
-
| <body>
|
|
186
|
-
| <p>
|
|
187
|
-
| <b>
|
|
188
|
-
| <div>
|
|
189
|
-
| <b>
|
|
190
|
-
| <marquee>
|
|
191
|
-
| <p>
|
|
192
|
-
|
|
193
|
-
#data
|
|
194
|
-
<a><p><a></a></p></a>
|
|
195
|
-
#errors
|
|
196
|
-
(1,3): expected-doctype-but-got-start-tag
|
|
197
|
-
(1,9): unexpected-start-tag-implies-end-tag
|
|
198
|
-
(1,9): adoption-agency-1.3
|
|
199
|
-
(1,21): unexpected-end-tag
|
|
200
|
-
#document
|
|
201
|
-
| <html>
|
|
202
|
-
| <head>
|
|
203
|
-
| <body>
|
|
204
|
-
| <a>
|
|
205
|
-
| <p>
|
|
206
|
-
| <a>
|
|
207
|
-
| <a>`;
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
describe("HTML5lib Tree Construction Tests", () => {
|
|
211
|
-
it("should parse DAT format correctly", () => {
|
|
212
|
-
const tests = parseHTML5libDATFile(basicTreeTestData);
|
|
213
|
-
expect(tests.length).toBeGreaterThan(0);
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
const firstTest = tests[0];
|
|
217
|
-
if (firstTest) {
|
|
218
|
-
expect(firstTest.data).toBe("Test");
|
|
219
|
-
expect(firstTest.errors.length).toBeGreaterThan(0);
|
|
220
|
-
expect(firstTest.document).toContain("<html>");
|
|
221
|
-
}
|
|
222
|
-
});
|
|
223
|
-
|
|
224
|
-
it("should handle doctype tests", () => {
|
|
225
|
-
const tests = parseHTML5libDATFile(doctypeTestData);
|
|
226
|
-
expect(tests.length).toBeGreaterThan(0);
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
const firstTest = tests[0];
|
|
230
|
-
if (firstTest) {
|
|
231
|
-
expect(firstTest.data).toBe("<!DOCTYPE html>");
|
|
232
|
-
expect(firstTest.errors.length).toBe(0);
|
|
233
|
-
expect(firstTest.document).toContain("<!DOCTYPE html>");
|
|
234
|
-
}
|
|
235
|
-
});
|
|
236
|
-
|
|
237
|
-
it("should handle error cases", () => {
|
|
238
|
-
const tests = parseHTML5libDATFile(errorHandlingTestData);
|
|
239
|
-
expect(tests.length).toBeGreaterThan(0);
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
const firstTest = tests[0];
|
|
243
|
-
if (firstTest) {
|
|
244
|
-
expect(firstTest.errors.length).toBeGreaterThan(0);
|
|
245
|
-
expect(firstTest.errors[0]).toContain(
|
|
246
|
-
"expected-doctype-but-got-start-tag"
|
|
247
|
-
);
|
|
248
|
-
}
|
|
249
|
-
});
|
|
250
|
-
});
|