html5 0.1.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +9 -2
- data/Manifest.txt +61 -2
- data/README +41 -5
- data/Rakefile.rb +22 -6
- data/{parse.rb → bin/html5} +11 -11
- data/lib/core_ext/string.rb +17 -0
- data/lib/html5/constants.rb +228 -0
- data/lib/html5/filters/iso639codes.rb +752 -0
- data/lib/html5/filters/rfc2046.rb +30 -0
- data/lib/html5/filters/rfc3987.rb +89 -0
- data/lib/html5/filters/validator.rb +830 -0
- data/lib/html5/html5parser.rb +25 -25
- data/lib/html5/html5parser/after_body_phase.rb +3 -3
- data/lib/html5/html5parser/after_frameset_phase.rb +3 -4
- data/lib/html5/html5parser/after_head_phase.rb +6 -6
- data/lib/html5/html5parser/before_head_phase.rb +1 -1
- data/lib/html5/html5parser/in_body_phase.rb +54 -48
- data/lib/html5/html5parser/in_caption_phase.rb +7 -6
- data/lib/html5/html5parser/in_cell_phase.rb +3 -3
- data/lib/html5/html5parser/in_column_group_phase.rb +1 -1
- data/lib/html5/html5parser/in_frameset_phase.rb +5 -5
- data/lib/html5/html5parser/in_head_phase.rb +10 -10
- data/lib/html5/html5parser/in_row_phase.rb +4 -2
- data/lib/html5/html5parser/in_select_phase.rb +7 -6
- data/lib/html5/html5parser/in_table_body_phase.rb +8 -5
- data/lib/html5/html5parser/in_table_phase.rb +12 -7
- data/lib/html5/html5parser/initial_phase.rb +5 -6
- data/lib/html5/html5parser/phase.rb +5 -9
- data/lib/html5/html5parser/root_element_phase.rb +1 -2
- data/lib/html5/html5parser/trailing_end_phase.rb +3 -3
- data/lib/html5/inputstream.rb +25 -31
- data/lib/html5/liberalxmlparser.rb +2 -2
- data/lib/html5/sanitizer.rb +6 -6
- data/lib/html5/serializer/htmlserializer.rb +2 -3
- data/lib/html5/sniffer.rb +45 -0
- data/lib/html5/tokenizer.rb +57 -59
- data/lib/html5/treebuilders/rexml.rb +7 -6
- data/lib/html5/treebuilders/simpletree.rb +1 -1
- data/lib/html5/treewalkers/base.rb +8 -0
- data/lib/html5/version.rb +3 -0
- data/testdata/encoding/chardet/test_big5.txt +51 -0
- data/testdata/encoding/test-yahoo-jp.dat +10 -0
- data/testdata/encoding/tests1.dat +394 -0
- data/testdata/encoding/tests2.dat +81 -0
- data/testdata/sanitizer/tests1.dat +416 -0
- data/testdata/serializer/core.test +104 -0
- data/testdata/serializer/injectmeta.test +65 -0
- data/testdata/serializer/optionaltags.test +900 -0
- data/testdata/serializer/options.test +60 -0
- data/testdata/serializer/whitespace.test +51 -0
- data/testdata/sites/google-results.htm +1 -0
- data/testdata/sites/python-ref-import.htm +1 -0
- data/testdata/sites/web-apps-old.htm +1 -0
- data/testdata/sites/web-apps.htm +34275 -0
- data/testdata/sniffer/htmlOrFeed.json +43 -0
- data/testdata/tokenizer/contentModelFlags.test +48 -0
- data/testdata/tokenizer/entities.test +2339 -0
- data/testdata/tokenizer/escapeFlag.test +21 -0
- data/testdata/tokenizer/test1.test +172 -0
- data/testdata/tokenizer/test2.test +129 -0
- data/testdata/tokenizer/test3.test +367 -0
- data/testdata/tokenizer/test4.test +198 -0
- data/testdata/tree-construction/tests1.dat +1950 -0
- data/testdata/tree-construction/tests2.dat +773 -0
- data/testdata/tree-construction/tests3.dat +270 -0
- data/testdata/tree-construction/tests4.dat +60 -0
- data/testdata/tree-construction/tests5.dat +175 -0
- data/testdata/tree-construction/tests6.dat +196 -0
- data/testdata/validator/attributes.test +1035 -0
- data/testdata/validator/base-href-attribute.test +787 -0
- data/testdata/validator/base-target-attribute.test +35 -0
- data/testdata/validator/blockquote-cite-attribute.test +7 -0
- data/testdata/validator/classattribute.test +152 -0
- data/testdata/validator/contenteditableattribute.test +59 -0
- data/testdata/validator/contextmenuattribute.test +115 -0
- data/testdata/validator/dirattribute.test +59 -0
- data/testdata/validator/draggableattribute.test +63 -0
- data/testdata/validator/html-xmlns-attribute.test +23 -0
- data/testdata/validator/idattribute.test +115 -0
- data/testdata/validator/inputattributes.test +2795 -0
- data/testdata/validator/irrelevantattribute.test +63 -0
- data/testdata/validator/langattribute.test +5579 -0
- data/testdata/validator/li-value-attribute.test +7 -0
- data/testdata/validator/link-href-attribute.test +7 -0
- data/testdata/validator/link-hreflang-attribute.test +7 -0
- data/testdata/validator/link-rel-attribute.test +271 -0
- data/testdata/validator/ol-start-attribute.test +7 -0
- data/testdata/validator/starttags.test +375 -0
- data/testdata/validator/style-scoped-attribute.test +7 -0
- data/testdata/validator/tabindexattribute.test +79 -0
- data/tests/preamble.rb +7 -17
- data/tests/test_encoding.rb +1 -1
- data/tests/test_lxp.rb +16 -0
- data/tests/test_parser.rb +2 -2
- data/tests/test_sniffer.rb +27 -0
- data/tests/test_treewalkers.rb +41 -22
- data/tests/test_validator.rb +31 -0
- metadata +65 -6
@@ -0,0 +1,21 @@
|
|
1
|
+
{"tests": [
|
2
|
+
|
3
|
+
{"description":"Commented close tag in [R]CDATA",
|
4
|
+
"contentModelFlags":["RCDATA", "CDATA"],
|
5
|
+
"lastStartTag":"bar",
|
6
|
+
"input":"foo<!--</bar>--></bar>",
|
7
|
+
"output":[["Character", "foo<!--</bar>-->"], ["EndTag", "bar"]]},
|
8
|
+
|
9
|
+
{"description":"Bogus comment in [R]CDATA",
|
10
|
+
"contentModelFlags":["RCDATA", "CDATA"],
|
11
|
+
"lastStartTag":"bar",
|
12
|
+
"input":"foo<!-->baz</bar>",
|
13
|
+
"output":[["Character", "foo<!-->baz"], ["EndTag", "bar"]]},
|
14
|
+
|
15
|
+
{"description":"End tag surrounded by bogus comment in [R]CDATA",
|
16
|
+
"contentModelFlags":["RCDATA", "CDATA"],
|
17
|
+
"lastStartTag":"bar",
|
18
|
+
"input":"foo<!--></bar><!-->baz</bar>",
|
19
|
+
"output":[["Character", "foo<!-->"], ["EndTag", "bar"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "bar"]]}
|
20
|
+
|
21
|
+
]}
|
@@ -0,0 +1,172 @@
|
|
1
|
+
{"tests": [
|
2
|
+
|
3
|
+
{"description":"Correct Doctype lowercase",
|
4
|
+
"input":"<!DOCTYPE html>",
|
5
|
+
"output":[["DOCTYPE", "html", null, null, true]]},
|
6
|
+
|
7
|
+
{"description":"Correct Doctype uppercase",
|
8
|
+
"input":"<!DOCTYPE HTML>",
|
9
|
+
"output":[["DOCTYPE", "HTML", null, null, true]]},
|
10
|
+
|
11
|
+
{"description":"Correct Doctype mixed case",
|
12
|
+
"input":"<!DOCTYPE HtMl>",
|
13
|
+
"output":[["DOCTYPE", "HtMl", null, null, true]]},
|
14
|
+
|
15
|
+
{"description":"Truncated doctype start",
|
16
|
+
"input":"<!DOC>",
|
17
|
+
"output":["ParseError", ["Comment", "DOC"]]},
|
18
|
+
|
19
|
+
{"description":"Doctype in error",
|
20
|
+
"input":"<!DOCTYPE foo>",
|
21
|
+
"output":[["DOCTYPE", "foo", null, null, true]]},
|
22
|
+
|
23
|
+
{"description":"Single Start Tag",
|
24
|
+
"input":"<h>",
|
25
|
+
"output":[["StartTag", "h", {}]]},
|
26
|
+
|
27
|
+
{"description":"Empty end tag",
|
28
|
+
"input":"</>",
|
29
|
+
"output":["ParseError"]},
|
30
|
+
|
31
|
+
{"description":"Empty start tag",
|
32
|
+
"input":"<>",
|
33
|
+
"output":["ParseError", ["Character", "<>"]]},
|
34
|
+
|
35
|
+
{"description":"Start Tag w/attribute",
|
36
|
+
"input":"<h a='b'>",
|
37
|
+
"output":[["StartTag", "h", {"a":"b"}]]},
|
38
|
+
|
39
|
+
{"description":"Start Tag w/attribute no quotes",
|
40
|
+
"input":"<h a=b>",
|
41
|
+
"output":[["StartTag", "h", {"a":"b"}]]},
|
42
|
+
|
43
|
+
{"description":"Start/End Tag",
|
44
|
+
"input":"<h></h>",
|
45
|
+
"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
|
46
|
+
|
47
|
+
{"description":"Two unclosed start tags",
|
48
|
+
"input":"<p>One<p>Two",
|
49
|
+
"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
|
50
|
+
|
51
|
+
{"description":"End Tag w/attribute",
|
52
|
+
"input":"<h></h a='b'>",
|
53
|
+
"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
|
54
|
+
|
55
|
+
{"description":"Multiple atts",
|
56
|
+
"input":"<h a='b' c='d'>",
|
57
|
+
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
|
58
|
+
|
59
|
+
{"description":"Multiple atts no space",
|
60
|
+
"input":"<h a='b'c='d'>",
|
61
|
+
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
|
62
|
+
|
63
|
+
{"description":"Repeated attr",
|
64
|
+
"input":"<h a='b' a='d'>",
|
65
|
+
"output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
|
66
|
+
|
67
|
+
{"description":"Simple comment",
|
68
|
+
"input":"<!--comment-->",
|
69
|
+
"output":[["Comment", "comment"]]},
|
70
|
+
|
71
|
+
{"description":"Comment, Central dash no space",
|
72
|
+
"input":"<!----->",
|
73
|
+
"output":["ParseError", ["Comment", "-"]]},
|
74
|
+
|
75
|
+
{"description":"Comment, two central dashes",
|
76
|
+
"input":"<!-- --comment -->",
|
77
|
+
"output":["ParseError", ["Comment", " --comment "]]},
|
78
|
+
|
79
|
+
{"description":"Unfinished comment",
|
80
|
+
"input":"<!--comment",
|
81
|
+
"output":["ParseError", ["Comment", "comment"]]},
|
82
|
+
|
83
|
+
{"description":"Start of a comment",
|
84
|
+
"input":"<!-",
|
85
|
+
"output":["ParseError", ["Comment", "-"]]},
|
86
|
+
|
87
|
+
{"description":"Short comment",
|
88
|
+
"input":"<!-->",
|
89
|
+
"output":["ParseError", ["Comment", ""]]},
|
90
|
+
|
91
|
+
{"description":"Short comment two",
|
92
|
+
"input":"<!--->",
|
93
|
+
"output":["ParseError", ["Comment", ""]]},
|
94
|
+
|
95
|
+
{"description":"Short comment three",
|
96
|
+
"input":"<!---->",
|
97
|
+
"output":[["Comment", ""]]},
|
98
|
+
|
99
|
+
|
100
|
+
{"description":"Ampersand EOF",
|
101
|
+
"input":"&",
|
102
|
+
"output":[["Character", "&"]]},
|
103
|
+
|
104
|
+
{"description":"Ampersand ampersand EOF",
|
105
|
+
"input":"&&",
|
106
|
+
"output":[["Character", "&&"]]},
|
107
|
+
|
108
|
+
{"description":"Ampersand space EOF",
|
109
|
+
"input":"& ",
|
110
|
+
"output":[["Character", "& "]]},
|
111
|
+
|
112
|
+
{"description":"Unfinished entity",
|
113
|
+
"input":"&f",
|
114
|
+
"output":["ParseError", ["Character", "&f"]]},
|
115
|
+
|
116
|
+
{"description":"Ampersand, number sign",
|
117
|
+
"input":"&#",
|
118
|
+
"output":["ParseError", ["Character", "&#"]]},
|
119
|
+
|
120
|
+
{"description":"Unfinished numeric entity",
|
121
|
+
"input":"&#x",
|
122
|
+
"output":["ParseError", ["Character", "&#x"]]},
|
123
|
+
|
124
|
+
{"description":"Entity with trailing semicolon (1)",
|
125
|
+
"input":"I'm ¬it",
|
126
|
+
"output":[["Character","I'm ¬it"]]},
|
127
|
+
|
128
|
+
{"description":"Entity with trailing semicolon (2)",
|
129
|
+
"input":"I'm ∉",
|
130
|
+
"output":[["Character","I'm ∉"]]},
|
131
|
+
|
132
|
+
{"description":"Entity without trailing semicolon (1)",
|
133
|
+
"input":"I'm ¬it",
|
134
|
+
"output":[["Character","I'm "], "ParseError", ["Character", "¬it"]]},
|
135
|
+
|
136
|
+
{"description":"Entity without trailing semicolon (2)",
|
137
|
+
"input":"I'm ¬in",
|
138
|
+
"output":[["Character","I'm "], "ParseError", ["Character", "¬in"]]},
|
139
|
+
|
140
|
+
{"description":"Partial entity match at end of file",
|
141
|
+
"input":"I'm &no",
|
142
|
+
"output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},
|
143
|
+
|
144
|
+
{"description":"ASCII decimal entity",
|
145
|
+
"input":"$",
|
146
|
+
"output":[["Character","$"]]},
|
147
|
+
|
148
|
+
{"description":"ASCII hexadecimal entity",
|
149
|
+
"input":"?",
|
150
|
+
"output":[["Character","?"]]},
|
151
|
+
|
152
|
+
{"description":"Hexadecimal entity in attribute",
|
153
|
+
"input":"<h a='?'></h>",
|
154
|
+
"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
|
155
|
+
|
156
|
+
{"description":"Entity in attribute without semicolon ending in x",
|
157
|
+
"input":"<h a='¬x'>",
|
158
|
+
"output":["ParseError", ["StartTag", "h", {"a":"¬x"}]]},
|
159
|
+
|
160
|
+
{"description":"Entity in attribute without semicolon ending in 1",
|
161
|
+
"input":"<h a='¬1'>",
|
162
|
+
"output":["ParseError", ["StartTag", "h", {"a":"¬1"}]]},
|
163
|
+
|
164
|
+
{"description":"Entity in attribute without semicolon ending in i",
|
165
|
+
"input":"<h a='¬i'>",
|
166
|
+
"output":["ParseError", ["StartTag", "h", {"a":"¬i"}]]},
|
167
|
+
|
168
|
+
{"description":"Entity in attribute without semicolon",
|
169
|
+
"input":"<h a='©'>",
|
170
|
+
"output":["ParseError", ["StartTag", "h", {"a":"©"}]]}
|
171
|
+
|
172
|
+
]}
|
@@ -0,0 +1,129 @@
|
|
1
|
+
{"tests": [
|
2
|
+
|
3
|
+
{"description":"DOCTYPE without name",
|
4
|
+
"input":"<!DOCTYPE>",
|
5
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
|
6
|
+
|
7
|
+
{"description":"DOCTYPE without space before name",
|
8
|
+
"input":"<!DOCTYPEhtml>",
|
9
|
+
"output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
|
10
|
+
|
11
|
+
{"description":"Incorrect DOCTYPE without a space before name",
|
12
|
+
"input":"<!DOCTYPEfoo>",
|
13
|
+
"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
|
14
|
+
|
15
|
+
{"description":"DOCTYPE with publicId",
|
16
|
+
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
17
|
+
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
|
18
|
+
|
19
|
+
{"description":"DOCTYPE with EOF after PUBLIC",
|
20
|
+
"input":"<!DOCTYPE html PUBLIC",
|
21
|
+
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
22
|
+
|
23
|
+
{"description":"DOCTYPE with EOF after PUBLIC '",
|
24
|
+
"input":"<!DOCTYPE html PUBLIC '",
|
25
|
+
"output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
|
26
|
+
|
27
|
+
{"description":"DOCTYPE with EOF after PUBLIC 'x",
|
28
|
+
"input":"<!DOCTYPE html PUBLIC 'x",
|
29
|
+
"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
|
30
|
+
|
31
|
+
{"description":"DOCTYPE with systemId",
|
32
|
+
"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
33
|
+
"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
|
34
|
+
|
35
|
+
{"description":"DOCTYPE with publicId and systemId",
|
36
|
+
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
37
|
+
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
|
38
|
+
|
39
|
+
{"description":"Incomplete doctype",
|
40
|
+
"input":"<!DOCTYPE html ",
|
41
|
+
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
42
|
+
|
43
|
+
{"description":"Numeric entity representing the NUL character",
|
44
|
+
"input":"�",
|
45
|
+
"output":["ParseError", ["Character", "\uFFFD"]]},
|
46
|
+
|
47
|
+
{"description":"Hexadecimal entity representing the NUL character",
|
48
|
+
"input":"�",
|
49
|
+
"output":["ParseError", ["Character", "\uFFFD"]]},
|
50
|
+
|
51
|
+
{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
|
52
|
+
"input":"�",
|
53
|
+
"output":["ParseError", ["Character", "\uFFFD"]]},
|
54
|
+
|
55
|
+
{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
|
56
|
+
"input":"�",
|
57
|
+
"output":["ParseError", ["Character", "\uFFFD"]]},
|
58
|
+
|
59
|
+
{"description":"Hexadecimal entity pair representing a surrogate pair",
|
60
|
+
"input":"��",
|
61
|
+
"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
|
62
|
+
|
63
|
+
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
|
64
|
+
"input":"ꯍ",
|
65
|
+
"output":[["Character", "\uABCD"]]},
|
66
|
+
|
67
|
+
{"description":"Entity without a name",
|
68
|
+
"input":"&;",
|
69
|
+
"output":["ParseError", ["Character", "&;"]]},
|
70
|
+
|
71
|
+
{"description":"Unescaped ampersand in attribute value",
|
72
|
+
"input":"<h a='&'>",
|
73
|
+
"output":["ParseError", ["StartTag", "h", { "a":"&" }]]},
|
74
|
+
|
75
|
+
{"description":"StartTag containing <",
|
76
|
+
"input":"<a<b>",
|
77
|
+
"output":[["StartTag", "a<b", { }]]},
|
78
|
+
|
79
|
+
{"description":"Non-void element containing trailing /",
|
80
|
+
"input":"<h/>",
|
81
|
+
"output":["ParseError", ["StartTag", "h", { }]]},
|
82
|
+
|
83
|
+
{"description":"Void element with permitted slash",
|
84
|
+
"input":"<br/>",
|
85
|
+
"output":[["StartTag", "br", { }]]},
|
86
|
+
|
87
|
+
{"description":"StartTag containing /",
|
88
|
+
"input":"<h/a='b'>",
|
89
|
+
"output":["ParseError", ["StartTag", "h", { "a":"b" }]]},
|
90
|
+
|
91
|
+
{"description":"Double-quoted attribute value",
|
92
|
+
"input":"<h a=\"b\">",
|
93
|
+
"output":[["StartTag", "h", { "a":"b" }]]},
|
94
|
+
|
95
|
+
{"description":"Unescaped </",
|
96
|
+
"input":"</",
|
97
|
+
"output":["ParseError", ["Character", "</"]]},
|
98
|
+
|
99
|
+
{"description":"Illegal end tag name",
|
100
|
+
"input":"</1>",
|
101
|
+
"output":["ParseError", ["Comment", "1"]]},
|
102
|
+
|
103
|
+
{"description":"Simili processing instruction",
|
104
|
+
"input":"<?namespace>",
|
105
|
+
"output":["ParseError", ["Comment", "?namespace"]]},
|
106
|
+
|
107
|
+
{"description":"A bogus comment stops at >, even if preceeded by two dashes",
|
108
|
+
"input":"<?foo-->",
|
109
|
+
"output":["ParseError", ["Comment", "?foo--"]]},
|
110
|
+
|
111
|
+
{"description":"Unescaped <",
|
112
|
+
"input":"foo < bar",
|
113
|
+
"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
|
114
|
+
|
115
|
+
{"description":"Null Byte Replacement",
|
116
|
+
"input":"\u0000",
|
117
|
+
"output":["ParseError", ["Character", "\ufffd"]]},
|
118
|
+
|
119
|
+
{"description":"Comment with dash",
|
120
|
+
"input":"<!---x",
|
121
|
+
"output":["ParseError", ["Comment", "-x"]]},
|
122
|
+
|
123
|
+
{"description":"Entity + newline",
|
124
|
+
"input":"\nx\n>\n",
|
125
|
+
"output":[["Character","\nx\n>\n"]]}
|
126
|
+
|
127
|
+
]}
|
128
|
+
|
129
|
+
|
@@ -0,0 +1,367 @@
|
|
1
|
+
{"tests": [
|
2
|
+
|
3
|
+
{"description":"<",
|
4
|
+
"input":"<",
|
5
|
+
"output":["ParseError", ["Character", "<"]]},
|
6
|
+
|
7
|
+
{"description":"<>",
|
8
|
+
"input":"<>",
|
9
|
+
"output":["ParseError", ["Character", "<>"]]},
|
10
|
+
|
11
|
+
{"description":"<!",
|
12
|
+
"input":"<!",
|
13
|
+
"output":["ParseError", ["Comment", ""]]},
|
14
|
+
|
15
|
+
{"description":"<!>",
|
16
|
+
"input":"<!>",
|
17
|
+
"output":["ParseError", ["Comment", ""]]},
|
18
|
+
|
19
|
+
{"description":"<!--",
|
20
|
+
"input":"<!--",
|
21
|
+
"output":["ParseError", ["Comment", ""]]},
|
22
|
+
|
23
|
+
{"description":"<!-->",
|
24
|
+
"input":"<!-->",
|
25
|
+
"output":["ParseError", ["Comment", ""]]},
|
26
|
+
|
27
|
+
{"description":"<!---",
|
28
|
+
"input":"<!---",
|
29
|
+
"output":["ParseError", ["Comment", ""]]},
|
30
|
+
|
31
|
+
{"description":"<!--->",
|
32
|
+
"input":"<!--->",
|
33
|
+
"output":["ParseError", ["Comment", ""]]},
|
34
|
+
|
35
|
+
{"description":"<!---->",
|
36
|
+
"input":"<!---->",
|
37
|
+
"output":[["Comment", ""]]},
|
38
|
+
|
39
|
+
{"description":"<!-----",
|
40
|
+
"input":"<!-----",
|
41
|
+
"output":["ParseError", "ParseError", ["Comment", "-"]]},
|
42
|
+
|
43
|
+
{"description":"<!----.",
|
44
|
+
"input":"<!----.",
|
45
|
+
"output":["ParseError", "ParseError", ["Comment", "--."]]},
|
46
|
+
|
47
|
+
{"description":"<!---?",
|
48
|
+
"input":"<!---?",
|
49
|
+
"output":["ParseError", ["Comment", "-?"]]},
|
50
|
+
|
51
|
+
{"description":"<!--?-",
|
52
|
+
"input":"<!--?-",
|
53
|
+
"output":["ParseError", ["Comment", "?"]]},
|
54
|
+
|
55
|
+
{"description":"<!--?--",
|
56
|
+
"input":"<!--?--",
|
57
|
+
"output":["ParseError", ["Comment", "?"]]},
|
58
|
+
|
59
|
+
{"description":"<!--?-.",
|
60
|
+
"input":"<!--?-.",
|
61
|
+
"output":["ParseError", ["Comment", "?-."]]},
|
62
|
+
|
63
|
+
{"description":"<!--?.",
|
64
|
+
"input":"<!--?.",
|
65
|
+
"output":["ParseError", ["Comment", "?."]]},
|
66
|
+
|
67
|
+
{"description":"<?>",
|
68
|
+
"input":"<?>",
|
69
|
+
"output":["ParseError", ["Comment", "?"]]},
|
70
|
+
|
71
|
+
{"description":"<??",
|
72
|
+
"input":"<??",
|
73
|
+
"output":["ParseError", ["Comment", "??"]]},
|
74
|
+
|
75
|
+
{"description":"</",
|
76
|
+
"input":"</",
|
77
|
+
"output":["ParseError", ["Character", "</"]]},
|
78
|
+
|
79
|
+
{"description":"</>",
|
80
|
+
"input":"</>",
|
81
|
+
"output":["ParseError"]},
|
82
|
+
|
83
|
+
{"description":"</?",
|
84
|
+
"input":"</?",
|
85
|
+
"output":["ParseError", ["Comment", "?"]]},
|
86
|
+
|
87
|
+
{"description":">",
|
88
|
+
"input":">",
|
89
|
+
"output":[["Character", ">"]]},
|
90
|
+
|
91
|
+
{"description":"-",
|
92
|
+
"input":"-",
|
93
|
+
"output":[["Character", "-"]]},
|
94
|
+
|
95
|
+
{"description":"?",
|
96
|
+
"input":"?",
|
97
|
+
"output":[["Character", "?"]]},
|
98
|
+
|
99
|
+
{"description":"&",
|
100
|
+
"input":"&",
|
101
|
+
"output":[["Character", "&"]]},
|
102
|
+
|
103
|
+
{"description":"&#",
|
104
|
+
"input":"&#",
|
105
|
+
"output":["ParseError", ["Character", "&#"]]},
|
106
|
+
|
107
|
+
{"description":"	",
|
108
|
+
"input":"	",
|
109
|
+
"output":["ParseError", ["Character", "\t"]]},
|
110
|
+
|
111
|
+
{"description":"<!doctype >",
|
112
|
+
"input":"<!doctype >",
|
113
|
+
"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
|
114
|
+
|
115
|
+
{"description":"<!doctype ",
|
116
|
+
"input":"<!doctype ",
|
117
|
+
"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
|
118
|
+
|
119
|
+
{"description":"<!doctype!>",
|
120
|
+
"input":"<!doctype!>",
|
121
|
+
"output":["ParseError", ["DOCTYPE", "!", null, null, true]]},
|
122
|
+
|
123
|
+
{"description":"<!doctype! >",
|
124
|
+
"input":"<!doctype! >",
|
125
|
+
"output":["ParseError", ["DOCTYPE", "!", null, null, true]]},
|
126
|
+
|
127
|
+
{"description":"<!doctype! ",
|
128
|
+
"input":"<!doctype! ",
|
129
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
130
|
+
|
131
|
+
{"description":"<!doctype! ?>",
|
132
|
+
"input":"<!doctype! ?>",
|
133
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
134
|
+
|
135
|
+
{"description":"<!doctype! ??",
|
136
|
+
"input":"<!doctype! ??",
|
137
|
+
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
138
|
+
|
139
|
+
{"description":"<!doctype!?",
|
140
|
+
"input":"<!doctype!?",
|
141
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!?", null, null, false]]},
|
142
|
+
|
143
|
+
{"description":"<!doctype! public>",
|
144
|
+
"input":"<!doctype! public>",
|
145
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
146
|
+
|
147
|
+
{"description":"<!doctype! public ",
|
148
|
+
"input":"<!doctype! public ",
|
149
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
150
|
+
|
151
|
+
{"description":"<!doctype! public?",
|
152
|
+
"input":"<!doctype! public?",
|
153
|
+
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
154
|
+
|
155
|
+
{"description":"<!doctype! public''",
|
156
|
+
"input":"<!doctype! public''",
|
157
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
|
158
|
+
|
159
|
+
{"description":"<!doctype! public'(",
|
160
|
+
"input":"<!doctype! public'(",
|
161
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "(", null, false]]},
|
162
|
+
|
163
|
+
{"description":"<!doctype! public\"\">",
|
164
|
+
"input":"<!doctype! public\"\">",
|
165
|
+
"output":["ParseError", ["DOCTYPE", "!", "", null, true]]},
|
166
|
+
|
167
|
+
{"description":"<!doctype! public\"\" ",
|
168
|
+
"input":"<!doctype! public\"\" ",
|
169
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
|
170
|
+
|
171
|
+
{"description":"<!doctype! public\"\"?",
|
172
|
+
"input":"<!doctype! public\"\"?",
|
173
|
+
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
|
174
|
+
|
175
|
+
{"description":"<!doctype! public\"\"'",
|
176
|
+
"input":"<!doctype! public\"\"'",
|
177
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", "", false]]},
|
178
|
+
|
179
|
+
{"description":"<!doctype! public\"\"\"",
|
180
|
+
"input":"<!doctype! public\"\"\"",
|
181
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", "", false]]},
|
182
|
+
|
183
|
+
{"description":"<!doctype! public\"#",
|
184
|
+
"input":"<!doctype! public\"#",
|
185
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "#", null, false]]},
|
186
|
+
|
187
|
+
{"description":"<!doctype! system>",
|
188
|
+
"input":"<!doctype! system>",
|
189
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
190
|
+
|
191
|
+
{"description":"<!doctype! system ",
|
192
|
+
"input":"<!doctype! system ",
|
193
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
194
|
+
|
195
|
+
{"description":"<!doctype! system?",
|
196
|
+
"input":"<!doctype! system?",
|
197
|
+
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
|
198
|
+
|
199
|
+
{"description":"<!doctype! system''",
|
200
|
+
"input":"<!doctype! system''",
|
201
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
|
202
|
+
|
203
|
+
{"description":"<!doctype! system'(",
|
204
|
+
"input":"<!doctype! system'(",
|
205
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "(", false]]},
|
206
|
+
|
207
|
+
{"description":"<!doctype! system\"\">",
|
208
|
+
"input":"<!doctype! system\"\">",
|
209
|
+
"output":["ParseError", ["DOCTYPE", "!", null, "", true]]},
|
210
|
+
|
211
|
+
{"description":"<!doctype! system\"\" ",
|
212
|
+
"input":"<!doctype! system\"\" ",
|
213
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
|
214
|
+
|
215
|
+
{"description":"<!doctype! system\"\"?",
|
216
|
+
"input":"<!doctype! system\"\"?",
|
217
|
+
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
|
218
|
+
|
219
|
+
{"description":"<!doctype! system\"#",
|
220
|
+
"input":"<!doctype! system\"#",
|
221
|
+
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "#", false]]},
|
222
|
+
|
223
|
+
{"description":"</z",
|
224
|
+
"input":"</z",
|
225
|
+
"output":["ParseError", ["EndTag", "z"]]},
|
226
|
+
|
227
|
+
{"description":"<z>",
|
228
|
+
"input":"<z>",
|
229
|
+
"output":[["StartTag", "z", {}]]},
|
230
|
+
|
231
|
+
{"description":"<z ",
|
232
|
+
"input":"<z ",
|
233
|
+
"output":["ParseError", ["StartTag", "z", {}]]},
|
234
|
+
|
235
|
+
{"description":"<z/>",
|
236
|
+
"input":"<z/>",
|
237
|
+
"output":["ParseError", ["StartTag", "z", {}]]},
|
238
|
+
|
239
|
+
{"description":"<z/ ",
|
240
|
+
"input":"<z/ ",
|
241
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {}]]},
|
242
|
+
|
243
|
+
{"description":"<z//",
|
244
|
+
"input":"<z//",
|
245
|
+
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {}]]},
|
246
|
+
|
247
|
+
{"description":"<z",
|
248
|
+
"input":"<z",
|
249
|
+
"output":["ParseError", ["StartTag", "z", {}]]},
|
250
|
+
|
251
|
+
{"description":"</z",
|
252
|
+
"input":"</z",
|
253
|
+
"output":["ParseError", ["EndTag", "z"]]},
|
254
|
+
|
255
|
+
{"description":"<z0",
|
256
|
+
"input":"<z0",
|
257
|
+
"output":["ParseError", ["StartTag", "z0", {}]]},
|
258
|
+
|
259
|
+
{"description":"<z/0=>",
|
260
|
+
"input":"<z/0=>",
|
261
|
+
"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
|
262
|
+
|
263
|
+
{"description":"<z/0= ",
|
264
|
+
"input":"<z/0= ",
|
265
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
|
266
|
+
|
267
|
+
{"description":"<z/0=?>",
|
268
|
+
"input":"<z/0=?>",
|
269
|
+
"output":["ParseError", ["StartTag", "z", {"0": "?"}]]},
|
270
|
+
|
271
|
+
{"description":"<z/0=? ",
|
272
|
+
"input":"<z/0=? ",
|
273
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "?"}]]},
|
274
|
+
|
275
|
+
{"description":"<z/0=??",
|
276
|
+
"input":"<z/0=??",
|
277
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "??"}]]},
|
278
|
+
|
279
|
+
{"description":"<z/0=''",
|
280
|
+
"input":"<z/0=''",
|
281
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
|
282
|
+
|
283
|
+
{"description":"<z/0='&",
|
284
|
+
"input":"<z/0='&",
|
285
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
|
286
|
+
|
287
|
+
{"description":"<z/0='%",
|
288
|
+
"input":"<z/0='%",
|
289
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "%"}]]},
|
290
|
+
|
291
|
+
{"description":"<z/0=\"'",
|
292
|
+
"input":"<z/0=\"'",
|
293
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "'"}]]},
|
294
|
+
|
295
|
+
{"description":"<z/0=\"\"",
|
296
|
+
"input":"<z/0=\"\"",
|
297
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
|
298
|
+
|
299
|
+
{"description":"<z/0=\"&",
|
300
|
+
"input":"<z/0=\"&",
|
301
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
|
302
|
+
|
303
|
+
{"description":"<z/0=&",
|
304
|
+
"input":"<z/0=&",
|
305
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
|
306
|
+
|
307
|
+
{"description":"<z/0>",
|
308
|
+
"input":"<z/0>",
|
309
|
+
"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
|
310
|
+
|
311
|
+
{"description":"<z/0 =",
|
312
|
+
"input":"<z/0 =",
|
313
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
|
314
|
+
|
315
|
+
{"description":"<z/0 >",
|
316
|
+
"input":"<z/0 >",
|
317
|
+
"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
|
318
|
+
|
319
|
+
{"description":"<z/0 ",
|
320
|
+
"input":"<z/0 ",
|
321
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
|
322
|
+
|
323
|
+
{"description":"<z/0 /",
|
324
|
+
"input":"<z/0 /",
|
325
|
+
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
|
326
|
+
|
327
|
+
{"description":"<z/0/",
|
328
|
+
"input":"<z/0/",
|
329
|
+
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
|
330
|
+
|
331
|
+
{"description":"<z/00",
|
332
|
+
"input":"<z/00",
|
333
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {"00": ""}]]},
|
334
|
+
|
335
|
+
{"description":"<z/0 0",
|
336
|
+
"input":"<z/0 0",
|
337
|
+
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
|
338
|
+
|
339
|
+
{"description":"<z/0='	",
|
340
|
+
"input":"<z/0='	",
|
341
|
+
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
|
342
|
+
|
343
|
+
{"description":"<z/0=\"	",
|
344
|
+
"input":"<z/0=\"	",
|
345
|
+
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
|
346
|
+
|
347
|
+
{"description":"<z/0=	",
|
348
|
+
"input":"<z/0=	",
|
349
|
+
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
|
350
|
+
|
351
|
+
{"description":"<z/0z",
|
352
|
+
"input":"<z/0z",
|
353
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {"0z": ""}]]},
|
354
|
+
|
355
|
+
{"description":"<z/0 z",
|
356
|
+
"input":"<z/0 z",
|
357
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "z": ""}]]},
|
358
|
+
|
359
|
+
{"description":"<zz",
|
360
|
+
"input":"<zz",
|
361
|
+
"output":["ParseError", ["StartTag", "zz", {}]]},
|
362
|
+
|
363
|
+
{"description":"<z/z",
|
364
|
+
"input":"<z/z",
|
365
|
+
"output":["ParseError", "ParseError", ["StartTag", "z", {"z": ""}]]}
|
366
|
+
|
367
|
+
]}
|