@tkeron/html-parser 0.1.7 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -7
- package/bun.lock +5 -0
- package/index.ts +4 -0
- package/package.json +7 -1
- package/src/css-selector.ts +1 -1
- package/src/dom-simulator.ts +38 -16
- package/src/encoding.ts +39 -0
- package/src/index.ts +9 -0
- package/src/parser.ts +478 -144
- package/src/serializer.ts +450 -0
- package/src/tokenizer.ts +59 -43
- package/tests/advanced.test.ts +119 -106
- package/tests/custom-elements.test.ts +172 -162
- package/tests/dom-extended.test.ts +12 -12
- package/tests/dom-manipulation.test.ts +9 -10
- package/tests/dom.test.ts +32 -27
- package/tests/helpers/tokenizer-adapter.test.ts +70 -0
- package/tests/helpers/tokenizer-adapter.ts +65 -0
- package/tests/helpers/tree-adapter.test.ts +39 -0
- package/tests/helpers/tree-adapter.ts +43 -0
- package/tests/html5lib-data/tokenizer/namedEntities.test +42422 -0
- package/tests/html5lib-data/tokenizer/pendingSpecChanges.test +9 -0
- package/tests/html5lib-data/tree-construction/adoption01.dat +354 -0
- package/tests/html5lib-data/tree-construction/adoption02.dat +39 -0
- package/tests/html5lib-data/tree-construction/domjs-unsafe.dat +0 -0
- package/tests/html5lib-data/tree-construction/entities02.dat +309 -0
- package/tests/html5lib-data/tree-construction/html5test-com.dat +301 -0
- package/tests/html5lib-data/tree-construction/math.dat +104 -0
- package/tests/html5lib-data/tree-construction/namespace-sensitivity.dat +22 -0
- package/tests/html5lib-data/tree-construction/noscript01.dat +237 -0
- package/tests/html5lib-data/tree-construction/ruby.dat +302 -0
- package/tests/html5lib-data/tree-construction/scriptdata01.dat +372 -0
- package/tests/html5lib-data/tree-construction/svg.dat +104 -0
- package/tests/html5lib-data/tree-construction/template.dat +1673 -0
- package/tests/html5lib-data/tree-construction/tests10.dat +853 -0
- package/tests/html5lib-data/tree-construction/tests11.dat +523 -0
- package/tests/html5lib-data/tree-construction/tests20.dat +842 -0
- package/tests/html5lib-data/tree-construction/tests21.dat +306 -0
- package/tests/html5lib-data/tree-construction/tests23.dat +168 -0
- package/tests/html5lib-data/tree-construction/tests24.dat +79 -0
- package/tests/html5lib-data/tree-construction/tests5.dat +210 -0
- package/tests/html5lib-data/tree-construction/tests6.dat +663 -0
- package/tests/html5lib-data/tree-construction/tests_innerHTML_1.dat +844 -0
- package/tests/parser.test.ts +172 -193
- package/tests/serializer-core.test.ts +16 -0
- package/tests/serializer-data/core.test +125 -0
- package/tests/serializer-data/injectmeta.test +66 -0
- package/tests/serializer-data/optionaltags.test +965 -0
- package/tests/serializer-data/options.test +60 -0
- package/tests/serializer-data/whitespace.test +51 -0
- package/tests/serializer-injectmeta.test.ts +16 -0
- package/tests/serializer-optionaltags.test.ts +16 -0
- package/tests/serializer-options.test.ts +16 -0
- package/tests/serializer-whitespace.test.ts +16 -0
- package/tests/tokenizer-namedEntities.test.ts +20 -0
- package/tests/tokenizer-pendingSpecChanges.test.ts +20 -0
- package/tests/tokenizer.test.ts +3 -6
- package/tests/tree-construction-adoption01.test.ts +37 -0
- package/tests/tree-construction-adoption02.test.ts +34 -0
- package/tests/tree-construction-domjs-unsafe.test.ts +24 -0
- package/tests/tree-construction-entities02.test.ts +33 -0
- package/tests/tree-construction-html5test-com.test.ts +24 -0
- package/tests/tree-construction-math.test.ts +18 -0
- package/tests/tree-construction-namespace-sensitivity.test.ts +18 -0
- package/tests/tree-construction-noscript01.test.ts +18 -0
- package/tests/tree-construction-ruby.test.ts +21 -0
- package/tests/tree-construction-scriptdata01.test.ts +21 -0
- package/tests/tree-construction-svg.test.ts +21 -0
- package/tests/tree-construction-template.test.ts +21 -0
- package/tests/tree-construction-tests10.test.ts +21 -0
- package/tests/tree-construction-tests11.test.ts +21 -0
- package/tests/tree-construction-tests20.test.ts +18 -0
- package/tests/tree-construction-tests21.test.ts +18 -0
- package/tests/tree-construction-tests23.test.ts +18 -0
- package/tests/tree-construction-tests24.test.ts +18 -0
- package/tests/tree-construction-tests5.test.ts +21 -0
- package/tests/tree-construction-tests6.test.ts +21 -0
- package/tests/tree-construction-tests_innerHTML_1.test.ts +21 -0
- package/tests/official/README.md +0 -87
- package/tests/official/acid/acid-tests.test.ts +0 -309
- package/tests/official/final-output/final-output.test.ts +0 -361
- package/tests/official/html5lib/tokenizer-utils.ts +0 -192
- package/tests/official/html5lib/tokenizer.test.ts +0 -171
- package/tests/official/html5lib/tree-construction-utils.ts +0 -194
- package/tests/official/html5lib/tree-construction.test.ts +0 -250
- package/tests/official/validator/validator-tests.test.ts +0 -237
- package/tests/official/validator-nu/validator-nu.test.ts +0 -335
- package/tests/official/whatwg/whatwg-tests.test.ts +0 -205
- package/tests/official/wpt/wpt-tests.test.ts +0 -409
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { expect, it, describe } from 'bun:test';
|
|
2
|
+
import { serializeTokens } from '../src/serializer';
|
|
3
|
+
import { readFileSync } from 'fs';
|
|
4
|
+
|
|
5
|
+
describe('Serializer Core Tests', () => {
|
|
6
|
+
const content = readFileSync('tests/serializer-data/core.test', 'utf8');
|
|
7
|
+
const data = JSON.parse(content);
|
|
8
|
+
const tests = data.tests;
|
|
9
|
+
|
|
10
|
+
tests.forEach((test: any, index: number) => {
|
|
11
|
+
it(test.description, () => {
|
|
12
|
+
const result = serializeTokens(test.input);
|
|
13
|
+
expect(result).toBe(test.expected[0]);
|
|
14
|
+
});
|
|
15
|
+
});
|
|
16
|
+
});
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
{"tests": [
|
|
2
|
+
|
|
3
|
+
{"description": "proper attribute value escaping",
|
|
4
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test \"with\" ""}]]],
|
|
5
|
+
"expected": ["<span title='test \"with\" &quot;'>"]
|
|
6
|
+
},
|
|
7
|
+
|
|
8
|
+
{"description": "proper attribute value non-quoting",
|
|
9
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo"}]]],
|
|
10
|
+
"expected": ["<span title=foo>"],
|
|
11
|
+
"xhtml": ["<span title=\"foo\">"]
|
|
12
|
+
},
|
|
13
|
+
|
|
14
|
+
{"description": "proper attribute value non-quoting (with <)",
|
|
15
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo<bar"}]]],
|
|
16
|
+
"expected": ["<span title=foo<bar>"],
|
|
17
|
+
"xhtml": ["<span title=\"foo<bar\">"]
|
|
18
|
+
},
|
|
19
|
+
|
|
20
|
+
{"description": "proper attribute value quoting (with =)",
|
|
21
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo=bar"}]]],
|
|
22
|
+
"expected": ["<span title=\"foo=bar\">"]
|
|
23
|
+
},
|
|
24
|
+
|
|
25
|
+
{"description": "proper attribute value quoting (with >)",
|
|
26
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo>bar"}]]],
|
|
27
|
+
"expected": ["<span title=\"foo>bar\">"]
|
|
28
|
+
},
|
|
29
|
+
|
|
30
|
+
{"description": "proper attribute value quoting (with \")",
|
|
31
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\"bar"}]]],
|
|
32
|
+
"expected": ["<span title='foo\"bar'>"]
|
|
33
|
+
},
|
|
34
|
+
|
|
35
|
+
{"description": "proper attribute value quoting (with ')",
|
|
36
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar"}]]],
|
|
37
|
+
"expected": ["<span title=\"foo'bar\">"]
|
|
38
|
+
},
|
|
39
|
+
|
|
40
|
+
{"description": "proper attribute value quoting (with both \" and ')",
|
|
41
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar\"baz"}]]],
|
|
42
|
+
"expected": ["<span title=\"foo'bar"baz\">"]
|
|
43
|
+
},
|
|
44
|
+
|
|
45
|
+
{"description": "proper attribute value quoting (with space)",
|
|
46
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo bar"}]]],
|
|
47
|
+
"expected": ["<span title=\"foo bar\">"]
|
|
48
|
+
},
|
|
49
|
+
|
|
50
|
+
{"description": "proper attribute value quoting (with tab)",
|
|
51
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\tbar"}]]],
|
|
52
|
+
"expected": ["<span title=\"foo\tbar\">"]
|
|
53
|
+
},
|
|
54
|
+
|
|
55
|
+
{"description": "proper attribute value quoting (with LF)",
|
|
56
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\nbar"}]]],
|
|
57
|
+
"expected": ["<span title=\"foo\nbar\">"]
|
|
58
|
+
},
|
|
59
|
+
|
|
60
|
+
{"description": "proper attribute value quoting (with CR)",
|
|
61
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\rbar"}]]],
|
|
62
|
+
"expected": ["<span title=\"foo\rbar\">"]
|
|
63
|
+
},
|
|
64
|
+
|
|
65
|
+
{"description": "proper attribute value non-quoting (with linetab)",
|
|
66
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Bbar"}]]],
|
|
67
|
+
"expected": ["<span title=foo\u000Bbar>"],
|
|
68
|
+
"xhtml": ["<span title=\"foo\u000Bbar\">"]
|
|
69
|
+
},
|
|
70
|
+
|
|
71
|
+
{"description": "proper attribute value quoting (with form feed)",
|
|
72
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Cbar"}]]],
|
|
73
|
+
"expected": ["<span title=\"foo\u000Cbar\">"]
|
|
74
|
+
},
|
|
75
|
+
|
|
76
|
+
{"description": "void element (as EmptyTag token)",
|
|
77
|
+
"input": [["EmptyTag", "img", {}]],
|
|
78
|
+
"expected": ["<img>"],
|
|
79
|
+
"xhtml": ["<img />"]
|
|
80
|
+
},
|
|
81
|
+
|
|
82
|
+
{"description": "void element (as StartTag token)",
|
|
83
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "img", {}]],
|
|
84
|
+
"expected": ["<img>"],
|
|
85
|
+
"xhtml": ["<img />"]
|
|
86
|
+
},
|
|
87
|
+
|
|
88
|
+
{"description": "doctype in error",
|
|
89
|
+
"input": [["Doctype", "foo"]],
|
|
90
|
+
"expected": ["<!DOCTYPE foo>"]
|
|
91
|
+
},
|
|
92
|
+
|
|
93
|
+
{"description": "character data",
|
|
94
|
+
"options": {"encoding":"utf-8"},
|
|
95
|
+
"input": [["Characters", "a<b>c&d"]],
|
|
96
|
+
"expected": ["a<b>c&d"]
|
|
97
|
+
},
|
|
98
|
+
|
|
99
|
+
{"description": "rcdata",
|
|
100
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
|
|
101
|
+
"expected": ["<script>a<b>c&d"],
|
|
102
|
+
"xhtml": ["<script>a<b>c&d"]
|
|
103
|
+
},
|
|
104
|
+
|
|
105
|
+
{"description": "doctype",
|
|
106
|
+
"input": [["Doctype", "HTML"]],
|
|
107
|
+
"expected": ["<!DOCTYPE HTML>"]
|
|
108
|
+
},
|
|
109
|
+
|
|
110
|
+
{"description": "HTML 4.01 DOCTYPE",
|
|
111
|
+
"input": [["Doctype", "HTML", "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd"]],
|
|
112
|
+
"expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"]
|
|
113
|
+
},
|
|
114
|
+
|
|
115
|
+
{"description": "HTML 4.01 DOCTYPE without system identifier",
|
|
116
|
+
"input": [["Doctype", "HTML", "-//W3C//DTD HTML 4.01//EN"]],
|
|
117
|
+
"expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">"]
|
|
118
|
+
},
|
|
119
|
+
|
|
120
|
+
{"description": "IBM DOCTYPE without public identifier",
|
|
121
|
+
"input": [["Doctype", "html", "", "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"]],
|
|
122
|
+
"expected": ["<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">"]
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
]}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
{"tests": [
|
|
2
|
+
|
|
3
|
+
{"description": "no encoding",
|
|
4
|
+
"options": {"inject_meta_charset": true},
|
|
5
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
6
|
+
"expected": [""],
|
|
7
|
+
"xhtml": ["<head></head>"]
|
|
8
|
+
},
|
|
9
|
+
|
|
10
|
+
{"description": "empytag head",
|
|
11
|
+
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
|
12
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
13
|
+
"expected": ["<meta charset=utf-8>"],
|
|
14
|
+
"xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
|
|
15
|
+
},
|
|
16
|
+
|
|
17
|
+
{"description": "head w/title",
|
|
18
|
+
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
|
19
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml","title",{}], ["Characters", "foo"],["EndTag", "http://www.w3.org/1999/xhtml", "title"], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
20
|
+
"expected": ["<meta charset=utf-8><title>foo</title>"],
|
|
21
|
+
"xhtml": ["<head><meta charset=\"utf-8\" /><title>foo</title></head>"]
|
|
22
|
+
},
|
|
23
|
+
|
|
24
|
+
{"description": "head w/meta-charset",
|
|
25
|
+
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
|
26
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
27
|
+
"expected": ["<meta charset=utf-8>"],
|
|
28
|
+
"xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
|
|
29
|
+
},
|
|
30
|
+
|
|
31
|
+
{"description": "head w/ two meta-charset",
|
|
32
|
+
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
|
33
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
34
|
+
"expected": ["<meta charset=utf-8><meta charset=utf-8>", "<head><meta charset=utf-8><meta charset=ascii>"],
|
|
35
|
+
"xhtml": ["<head><meta charset=\"utf-8\" /><meta charset=\"utf-8\" /></head>", "<head><meta charset=\"utf-8\" /><meta charset=\"ascii\" /></head>"]
|
|
36
|
+
},
|
|
37
|
+
|
|
38
|
+
{"description": "head w/robots",
|
|
39
|
+
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
|
40
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
41
|
+
"expected": ["<meta charset=utf-8><meta content=noindex name=robots>"],
|
|
42
|
+
"xhtml": ["<head><meta charset=\"utf-8\" /><meta content=\"noindex\" name=\"robots\" /></head>"]
|
|
43
|
+
},
|
|
44
|
+
|
|
45
|
+
{"description": "head w/robots & charset",
|
|
46
|
+
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
|
47
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
48
|
+
"expected": ["<meta content=noindex name=robots><meta charset=utf-8>"],
|
|
49
|
+
"xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta charset=\"utf-8\" /></head>"]
|
|
50
|
+
},
|
|
51
|
+
|
|
52
|
+
{"description": "head w/ charset in http-equiv content-type",
|
|
53
|
+
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
|
54
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
55
|
+
"expected": ["<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
|
|
56
|
+
"xhtml": ["<head><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
|
|
57
|
+
},
|
|
58
|
+
|
|
59
|
+
{"description": "head w/robots & charset in http-equiv content-type",
|
|
60
|
+
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
|
61
|
+
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
62
|
+
"expected": ["<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
|
|
63
|
+
"xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
]}
|