feedtools 0.2.26 → 0.2.27

Sign up to get free protection for your applications and to get access to all the features.
Files changed (166) hide show
  1. data/CHANGELOG +232 -216
  2. data/db/migration.rb +2 -0
  3. data/db/schema.mysql.sql +2 -0
  4. data/db/schema.postgresql.sql +3 -1
  5. data/db/schema.sqlite.sql +3 -1
  6. data/lib/feed_tools.rb +37 -14
  7. data/lib/feed_tools/database_feed_cache.rb +13 -2
  8. data/lib/feed_tools/feed.rb +430 -104
  9. data/lib/feed_tools/feed_item.rb +533 -268
  10. data/lib/feed_tools/helpers/generic_helper.rb +1 -1
  11. data/lib/feed_tools/helpers/html_helper.rb +78 -116
  12. data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
  13. data/lib/feed_tools/helpers/uri_helper.rb +46 -54
  14. data/lib/feed_tools/monkey_patch.rb +27 -1
  15. data/lib/feed_tools/vendor/html5/History.txt +10 -0
  16. data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
  17. data/lib/feed_tools/vendor/html5/README +45 -0
  18. data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
  19. data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
  20. data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
  21. data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
  22. data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
  23. data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
  24. data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
  25. data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
  26. data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
  27. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
  28. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
  29. data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
  30. data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
  31. data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
  32. data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
  33. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
  34. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  35. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
  36. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
  37. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
  38. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
  39. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
  40. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  41. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  42. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
  43. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
  44. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
  45. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
  46. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
  47. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
  48. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
  49. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
  50. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  51. data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
  52. data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
  53. data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
  54. data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
  55. data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
  56. data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
  57. data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
  58. data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
  59. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
  60. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
  61. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
  62. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
  63. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
  64. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
  65. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
  66. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
  67. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
  68. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
  69. data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
  70. data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
  71. data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
  72. data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
  73. data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
  74. data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
  75. data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
  76. data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
  77. data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
  78. data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
  79. data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
  80. data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
  81. data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
  82. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
  83. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
  84. data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
  85. data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
  86. data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
  87. data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
  88. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
  89. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
  90. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
  91. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
  92. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
  93. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
  94. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
  95. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
  96. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
  97. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
  98. data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
  99. data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
  100. data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
  101. data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
  102. data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
  103. data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
  104. data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
  105. data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
  106. data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
  107. data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
  108. data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
  109. data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
  110. data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
  111. data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
  112. data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
  113. data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
  114. data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
  115. data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
  116. data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
  117. data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
  118. data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
  119. data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
  120. data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
  121. data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
  122. data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
  123. data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
  124. data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
  125. data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
  126. data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
  127. data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
  128. data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
  129. data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
  130. data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
  131. data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
  132. data/lib/feed_tools/vendor/uri.rb +781 -0
  133. data/lib/feed_tools/version.rb +1 -1
  134. data/rakefile +27 -6
  135. data/test/unit/atom_test.rb +298 -210
  136. data/test/unit/helper_test.rb +7 -12
  137. data/test/unit/rdf_test.rb +51 -1
  138. data/test/unit/rss_test.rb +13 -3
  139. metadata +239 -116
  140. data/lib/feed_tools/vendor/htree.rb +0 -97
  141. data/lib/feed_tools/vendor/htree/container.rb +0 -10
  142. data/lib/feed_tools/vendor/htree/context.rb +0 -67
  143. data/lib/feed_tools/vendor/htree/display.rb +0 -27
  144. data/lib/feed_tools/vendor/htree/doc.rb +0 -149
  145. data/lib/feed_tools/vendor/htree/elem.rb +0 -262
  146. data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
  147. data/lib/feed_tools/vendor/htree/equality.rb +0 -218
  148. data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
  149. data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
  150. data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
  151. data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
  152. data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
  153. data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
  154. data/lib/feed_tools/vendor/htree/loc.rb +0 -367
  155. data/lib/feed_tools/vendor/htree/modules.rb +0 -48
  156. data/lib/feed_tools/vendor/htree/name.rb +0 -124
  157. data/lib/feed_tools/vendor/htree/output.rb +0 -207
  158. data/lib/feed_tools/vendor/htree/parse.rb +0 -409
  159. data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
  160. data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
  161. data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
  162. data/lib/feed_tools/vendor/htree/scan.rb +0 -166
  163. data/lib/feed_tools/vendor/htree/tag.rb +0 -111
  164. data/lib/feed_tools/vendor/htree/template.rb +0 -909
  165. data/lib/feed_tools/vendor/htree/text.rb +0 -115
  166. data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,21 @@
1
+ {"tests": [
2
+
3
+ {"description":"Commented close tag in [R]CDATA",
4
+ "contentModelFlags":["RCDATA", "CDATA"],
5
+ "lastStartTag":"bar",
6
+ "input":"foo<!--</bar>--></bar>",
7
+ "output":[["Character", "foo<!--</bar>-->"], ["EndTag", "bar"]]},
8
+
9
+ {"description":"Bogus comment in [R]CDATA",
10
+ "contentModelFlags":["RCDATA", "CDATA"],
11
+ "lastStartTag":"bar",
12
+ "input":"foo<!-->baz</bar>",
13
+ "output":[["Character", "foo<!-->baz"], ["EndTag", "bar"]]},
14
+
15
+ {"description":"End tag surrounded by bogus comment in [R]CDATA",
16
+ "contentModelFlags":["RCDATA", "CDATA"],
17
+ "lastStartTag":"bar",
18
+ "input":"foo<!--></bar><!-->baz</bar>",
19
+ "output":[["Character", "foo<!-->"], ["EndTag", "bar"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "bar"]]}
20
+
21
+ ]}
@@ -0,0 +1,172 @@
1
+ {"tests": [
2
+
3
+ {"description":"Correct Doctype lowercase",
4
+ "input":"<!DOCTYPE html>",
5
+ "output":[["DOCTYPE", "html", null, null, true]]},
6
+
7
+ {"description":"Correct Doctype uppercase",
8
+ "input":"<!DOCTYPE HTML>",
9
+ "output":[["DOCTYPE", "HTML", null, null, true]]},
10
+
11
+ {"description":"Correct Doctype mixed case",
12
+ "input":"<!DOCTYPE HtMl>",
13
+ "output":[["DOCTYPE", "HtMl", null, null, true]]},
14
+
15
+ {"description":"Truncated doctype start",
16
+ "input":"<!DOC>",
17
+ "output":["ParseError", ["Comment", "DOC"]]},
18
+
19
+ {"description":"Doctype in error",
20
+ "input":"<!DOCTYPE foo>",
21
+ "output":[["DOCTYPE", "foo", null, null, true]]},
22
+
23
+ {"description":"Single Start Tag",
24
+ "input":"<h>",
25
+ "output":[["StartTag", "h", {}]]},
26
+
27
+ {"description":"Empty end tag",
28
+ "input":"</>",
29
+ "output":["ParseError"]},
30
+
31
+ {"description":"Empty start tag",
32
+ "input":"<>",
33
+ "output":["ParseError", ["Character", "<>"]]},
34
+
35
+ {"description":"Start Tag w/attribute",
36
+ "input":"<h a='b'>",
37
+ "output":[["StartTag", "h", {"a":"b"}]]},
38
+
39
+ {"description":"Start Tag w/attribute no quotes",
40
+ "input":"<h a=b>",
41
+ "output":[["StartTag", "h", {"a":"b"}]]},
42
+
43
+ {"description":"Start/End Tag",
44
+ "input":"<h></h>",
45
+ "output":[["StartTag", "h", {}], ["EndTag", "h"]]},
46
+
47
+ {"description":"Two unclosed start tags",
48
+ "input":"<p>One<p>Two",
49
+ "output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
50
+
51
+ {"description":"End Tag w/attribute",
52
+ "input":"<h></h a='b'>",
53
+ "output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
54
+
55
+ {"description":"Multiple atts",
56
+ "input":"<h a='b' c='d'>",
57
+ "output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
58
+
59
+ {"description":"Multiple atts no space",
60
+ "input":"<h a='b'c='d'>",
61
+ "output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
62
+
63
+ {"description":"Repeated attr",
64
+ "input":"<h a='b' a='d'>",
65
+ "output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
66
+
67
+ {"description":"Simple comment",
68
+ "input":"<!--comment-->",
69
+ "output":[["Comment", "comment"]]},
70
+
71
+ {"description":"Comment, Central dash no space",
72
+ "input":"<!----->",
73
+ "output":["ParseError", ["Comment", "-"]]},
74
+
75
+ {"description":"Comment, two central dashes",
76
+ "input":"<!-- --comment -->",
77
+ "output":["ParseError", ["Comment", " --comment "]]},
78
+
79
+ {"description":"Unfinished comment",
80
+ "input":"<!--comment",
81
+ "output":["ParseError", ["Comment", "comment"]]},
82
+
83
+ {"description":"Start of a comment",
84
+ "input":"<!-",
85
+ "output":["ParseError", ["Comment", "-"]]},
86
+
87
+ {"description":"Short comment",
88
+ "input":"<!-->",
89
+ "output":["ParseError", ["Comment", ""]]},
90
+
91
+ {"description":"Short comment two",
92
+ "input":"<!--->",
93
+ "output":["ParseError", ["Comment", ""]]},
94
+
95
+ {"description":"Short comment three",
96
+ "input":"<!---->",
97
+ "output":[["Comment", ""]]},
98
+
99
+
100
+ {"description":"Ampersand EOF",
101
+ "input":"&",
102
+ "output":[["Character", "&"]]},
103
+
104
+ {"description":"Ampersand ampersand EOF",
105
+ "input":"&&",
106
+ "output":[["Character", "&&"]]},
107
+
108
+ {"description":"Ampersand space EOF",
109
+ "input":"& ",
110
+ "output":[["Character", "& "]]},
111
+
112
+ {"description":"Unfinished entity",
113
+ "input":"&f",
114
+ "output":["ParseError", ["Character", "&f"]]},
115
+
116
+ {"description":"Ampersand, number sign",
117
+ "input":"&#",
118
+ "output":["ParseError", ["Character", "&#"]]},
119
+
120
+ {"description":"Unfinished numeric entity",
121
+ "input":"&#x",
122
+ "output":["ParseError", ["Character", "&#x"]]},
123
+
124
+ {"description":"Entity with trailing semicolon (1)",
125
+ "input":"I'm &not;it",
126
+ "output":[["Character","I'm ¬it"]]},
127
+
128
+ {"description":"Entity with trailing semicolon (2)",
129
+ "input":"I'm &notin;",
130
+ "output":[["Character","I'm ∉"]]},
131
+
132
+ {"description":"Entity without trailing semicolon (1)",
133
+ "input":"I'm &notit",
134
+ "output":[["Character","I'm "], "ParseError", ["Character", "¬it"]]},
135
+
136
+ {"description":"Entity without trailing semicolon (2)",
137
+ "input":"I'm &notin",
138
+ "output":[["Character","I'm "], "ParseError", ["Character", "¬in"]]},
139
+
140
+ {"description":"Partial entity match at end of file",
141
+ "input":"I'm &no",
142
+ "output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},
143
+
144
+ {"description":"ASCII decimal entity",
145
+ "input":"&#0036;",
146
+ "output":[["Character","$"]]},
147
+
148
+ {"description":"ASCII hexadecimal entity",
149
+ "input":"&#x3f;",
150
+ "output":[["Character","?"]]},
151
+
152
+ {"description":"Hexadecimal entity in attribute",
153
+ "input":"<h a='&#x3f;'></h>",
154
+ "output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
155
+
156
+ {"description":"Entity in attribute without semicolon ending in x",
157
+ "input":"<h a='&notx'>",
158
+ "output":["ParseError", ["StartTag", "h", {"a":"&notx"}]]},
159
+
160
+ {"description":"Entity in attribute without semicolon ending in 1",
161
+ "input":"<h a='&not1'>",
162
+ "output":["ParseError", ["StartTag", "h", {"a":"&not1"}]]},
163
+
164
+ {"description":"Entity in attribute without semicolon ending in i",
165
+ "input":"<h a='&noti'>",
166
+ "output":["ParseError", ["StartTag", "h", {"a":"&noti"}]]},
167
+
168
+ {"description":"Entity in attribute without semicolon",
169
+ "input":"<h a='&COPY'>",
170
+ "output":["ParseError", ["StartTag", "h", {"a":"©"}]]}
171
+
172
+ ]}
@@ -0,0 +1,129 @@
1
+ {"tests": [
2
+
3
+ {"description":"DOCTYPE without name",
4
+ "input":"<!DOCTYPE>",
5
+ "output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
6
+
7
+ {"description":"DOCTYPE without space before name",
8
+ "input":"<!DOCTYPEhtml>",
9
+ "output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
10
+
11
+ {"description":"Incorrect DOCTYPE without a space before name",
12
+ "input":"<!DOCTYPEfoo>",
13
+ "output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
14
+
15
+ {"description":"DOCTYPE with publicId",
16
+ "input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
17
+ "output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
18
+
19
+ {"description":"DOCTYPE with EOF after PUBLIC",
20
+ "input":"<!DOCTYPE html PUBLIC",
21
+ "output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
22
+
23
+ {"description":"DOCTYPE with EOF after PUBLIC '",
24
+ "input":"<!DOCTYPE html PUBLIC '",
25
+ "output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
26
+
27
+ {"description":"DOCTYPE with EOF after PUBLIC 'x",
28
+ "input":"<!DOCTYPE html PUBLIC 'x",
29
+ "output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
30
+
31
+ {"description":"DOCTYPE with systemId",
32
+ "input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
33
+ "output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
34
+
35
+ {"description":"DOCTYPE with publicId and systemId",
36
+ "input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
37
+ "output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
38
+
39
+ {"description":"Incomplete doctype",
40
+ "input":"<!DOCTYPE html ",
41
+ "output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
42
+
43
+ {"description":"Numeric entity representing the NUL character",
44
+ "input":"&#0000;",
45
+ "output":["ParseError", ["Character", "\uFFFD"]]},
46
+
47
+ {"description":"Hexadecimal entity representing the NUL character",
48
+ "input":"&#x0000;",
49
+ "output":["ParseError", ["Character", "\uFFFD"]]},
50
+
51
+ {"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
52
+ "input":"&#2225222;",
53
+ "output":["ParseError", ["Character", "\uFFFD"]]},
54
+
55
+ {"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
56
+ "input":"&#x1010FFFF;",
57
+ "output":["ParseError", ["Character", "\uFFFD"]]},
58
+
59
+ {"description":"Hexadecimal entity pair representing a surrogate pair",
60
+ "input":"&#xD869;&#xDED6;",
61
+ "output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
62
+
63
+ {"description":"Hexadecimal entity with mixed uppercase and lowercase",
64
+ "input":"&#xaBcD;",
65
+ "output":[["Character", "\uABCD"]]},
66
+
67
+ {"description":"Entity without a name",
68
+ "input":"&;",
69
+ "output":["ParseError", ["Character", "&;"]]},
70
+
71
+ {"description":"Unescaped ampersand in attribute value",
72
+ "input":"<h a='&'>",
73
+ "output":["ParseError", ["StartTag", "h", { "a":"&" }]]},
74
+
75
+ {"description":"StartTag containing <",
76
+ "input":"<a<b>",
77
+ "output":[["StartTag", "a<b", { }]]},
78
+
79
+ {"description":"Non-void element containing trailing /",
80
+ "input":"<h/>",
81
+ "output":["ParseError", ["StartTag", "h", { }]]},
82
+
83
+ {"description":"Void element with permitted slash",
84
+ "input":"<br/>",
85
+ "output":[["StartTag", "br", { }]]},
86
+
87
+ {"description":"StartTag containing /",
88
+ "input":"<h/a='b'>",
89
+ "output":["ParseError", ["StartTag", "h", { "a":"b" }]]},
90
+
91
+ {"description":"Double-quoted attribute value",
92
+ "input":"<h a=\"b\">",
93
+ "output":[["StartTag", "h", { "a":"b" }]]},
94
+
95
+ {"description":"Unescaped </",
96
+ "input":"</",
97
+ "output":["ParseError", ["Character", "</"]]},
98
+
99
+ {"description":"Illegal end tag name",
100
+ "input":"</1>",
101
+ "output":["ParseError", ["Comment", "1"]]},
102
+
103
+ {"description":"Simili processing instruction",
104
+ "input":"<?namespace>",
105
+ "output":["ParseError", ["Comment", "?namespace"]]},
106
+
107
+ {"description":"A bogus comment stops at >, even if preceeded by two dashes",
108
+ "input":"<?foo-->",
109
+ "output":["ParseError", ["Comment", "?foo--"]]},
110
+
111
+ {"description":"Unescaped <",
112
+ "input":"foo < bar",
113
+ "output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
114
+
115
+ {"description":"Null Byte Replacement",
116
+ "input":"\u0000",
117
+ "output":["ParseError", ["Character", "\ufffd"]]},
118
+
119
+ {"description":"Comment with dash",
120
+ "input":"<!---x",
121
+ "output":["ParseError", ["Comment", "-x"]]},
122
+
123
+ {"description":"Entity + newline",
124
+ "input":"\nx\n&gt;\n",
125
+ "output":[["Character","\nx\n>\n"]]}
126
+
127
+ ]}
128
+
129
+
@@ -0,0 +1,367 @@
1
+ {"tests": [
2
+
3
+ {"description":"<",
4
+ "input":"<",
5
+ "output":["ParseError", ["Character", "<"]]},
6
+
7
+ {"description":"<>",
8
+ "input":"<>",
9
+ "output":["ParseError", ["Character", "<>"]]},
10
+
11
+ {"description":"<!",
12
+ "input":"<!",
13
+ "output":["ParseError", ["Comment", ""]]},
14
+
15
+ {"description":"<!>",
16
+ "input":"<!>",
17
+ "output":["ParseError", ["Comment", ""]]},
18
+
19
+ {"description":"<!--",
20
+ "input":"<!--",
21
+ "output":["ParseError", ["Comment", ""]]},
22
+
23
+ {"description":"<!-->",
24
+ "input":"<!-->",
25
+ "output":["ParseError", ["Comment", ""]]},
26
+
27
+ {"description":"<!---",
28
+ "input":"<!---",
29
+ "output":["ParseError", ["Comment", ""]]},
30
+
31
+ {"description":"<!--->",
32
+ "input":"<!--->",
33
+ "output":["ParseError", ["Comment", ""]]},
34
+
35
+ {"description":"<!---->",
36
+ "input":"<!---->",
37
+ "output":[["Comment", ""]]},
38
+
39
+ {"description":"<!-----",
40
+ "input":"<!-----",
41
+ "output":["ParseError", "ParseError", ["Comment", "-"]]},
42
+
43
+ {"description":"<!----.",
44
+ "input":"<!----.",
45
+ "output":["ParseError", "ParseError", ["Comment", "--."]]},
46
+
47
+ {"description":"<!---?",
48
+ "input":"<!---?",
49
+ "output":["ParseError", ["Comment", "-?"]]},
50
+
51
+ {"description":"<!--?-",
52
+ "input":"<!--?-",
53
+ "output":["ParseError", ["Comment", "?"]]},
54
+
55
+ {"description":"<!--?--",
56
+ "input":"<!--?--",
57
+ "output":["ParseError", ["Comment", "?"]]},
58
+
59
+ {"description":"<!--?-.",
60
+ "input":"<!--?-.",
61
+ "output":["ParseError", ["Comment", "?-."]]},
62
+
63
+ {"description":"<!--?.",
64
+ "input":"<!--?.",
65
+ "output":["ParseError", ["Comment", "?."]]},
66
+
67
+ {"description":"<?>",
68
+ "input":"<?>",
69
+ "output":["ParseError", ["Comment", "?"]]},
70
+
71
+ {"description":"<??",
72
+ "input":"<??",
73
+ "output":["ParseError", ["Comment", "??"]]},
74
+
75
+ {"description":"</",
76
+ "input":"</",
77
+ "output":["ParseError", ["Character", "</"]]},
78
+
79
+ {"description":"</>",
80
+ "input":"</>",
81
+ "output":["ParseError"]},
82
+
83
+ {"description":"</?",
84
+ "input":"</?",
85
+ "output":["ParseError", ["Comment", "?"]]},
86
+
87
+ {"description":">",
88
+ "input":">",
89
+ "output":[["Character", ">"]]},
90
+
91
+ {"description":"-",
92
+ "input":"-",
93
+ "output":[["Character", "-"]]},
94
+
95
+ {"description":"?",
96
+ "input":"?",
97
+ "output":[["Character", "?"]]},
98
+
99
+ {"description":"&",
100
+ "input":"&",
101
+ "output":[["Character", "&"]]},
102
+
103
+ {"description":"&#",
104
+ "input":"&#",
105
+ "output":["ParseError", ["Character", "&#"]]},
106
+
107
+ {"description":"&#9",
108
+ "input":"&#9",
109
+ "output":["ParseError", ["Character", "\t"]]},
110
+
111
+ {"description":"<!doctype >",
112
+ "input":"<!doctype >",
113
+ "output":["ParseError", ["DOCTYPE", "", null, null, false]]},
114
+
115
+ {"description":"<!doctype ",
116
+ "input":"<!doctype ",
117
+ "output":["ParseError", ["DOCTYPE", "", null, null, false]]},
118
+
119
+ {"description":"<!doctype!>",
120
+ "input":"<!doctype!>",
121
+ "output":["ParseError", ["DOCTYPE", "!", null, null, true]]},
122
+
123
+ {"description":"<!doctype! >",
124
+ "input":"<!doctype! >",
125
+ "output":["ParseError", ["DOCTYPE", "!", null, null, true]]},
126
+
127
+ {"description":"<!doctype! ",
128
+ "input":"<!doctype! ",
129
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
130
+
131
+ {"description":"<!doctype! ?>",
132
+ "input":"<!doctype! ?>",
133
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
134
+
135
+ {"description":"<!doctype! ??",
136
+ "input":"<!doctype! ??",
137
+ "output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
138
+
139
+ {"description":"<!doctype!?",
140
+ "input":"<!doctype!?",
141
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!?", null, null, false]]},
142
+
143
+ {"description":"<!doctype! public>",
144
+ "input":"<!doctype! public>",
145
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
146
+
147
+ {"description":"<!doctype! public ",
148
+ "input":"<!doctype! public ",
149
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
150
+
151
+ {"description":"<!doctype! public?",
152
+ "input":"<!doctype! public?",
153
+ "output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
154
+
155
+ {"description":"<!doctype! public''",
156
+ "input":"<!doctype! public''",
157
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
158
+
159
+ {"description":"<!doctype! public'(",
160
+ "input":"<!doctype! public'(",
161
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!", "(", null, false]]},
162
+
163
+ {"description":"<!doctype! public\"\">",
164
+ "input":"<!doctype! public\"\">",
165
+ "output":["ParseError", ["DOCTYPE", "!", "", null, true]]},
166
+
167
+ {"description":"<!doctype! public\"\" ",
168
+ "input":"<!doctype! public\"\" ",
169
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
170
+
171
+ {"description":"<!doctype! public\"\"?",
172
+ "input":"<!doctype! public\"\"?",
173
+ "output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
174
+
175
+ {"description":"<!doctype! public\"\"'",
176
+ "input":"<!doctype! public\"\"'",
177
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!", "", "", false]]},
178
+
179
+ {"description":"<!doctype! public\"\"\"",
180
+ "input":"<!doctype! public\"\"\"",
181
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!", "", "", false]]},
182
+
183
+ {"description":"<!doctype! public\"#",
184
+ "input":"<!doctype! public\"#",
185
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!", "#", null, false]]},
186
+
187
+ {"description":"<!doctype! system>",
188
+ "input":"<!doctype! system>",
189
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
190
+
191
+ {"description":"<!doctype! system ",
192
+ "input":"<!doctype! system ",
193
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
194
+
195
+ {"description":"<!doctype! system?",
196
+ "input":"<!doctype! system?",
197
+ "output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
198
+
199
+ {"description":"<!doctype! system''",
200
+ "input":"<!doctype! system''",
201
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
202
+
203
+ {"description":"<!doctype! system'(",
204
+ "input":"<!doctype! system'(",
205
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "(", false]]},
206
+
207
+ {"description":"<!doctype! system\"\">",
208
+ "input":"<!doctype! system\"\">",
209
+ "output":["ParseError", ["DOCTYPE", "!", null, "", true]]},
210
+
211
+ {"description":"<!doctype! system\"\" ",
212
+ "input":"<!doctype! system\"\" ",
213
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
214
+
215
+ {"description":"<!doctype! system\"\"?",
216
+ "input":"<!doctype! system\"\"?",
217
+ "output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
218
+
219
+ {"description":"<!doctype! system\"#",
220
+ "input":"<!doctype! system\"#",
221
+ "output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "#", false]]},
222
+
223
+ {"description":"</z",
224
+ "input":"</z",
225
+ "output":["ParseError", ["EndTag", "z"]]},
226
+
227
+ {"description":"<z>",
228
+ "input":"<z>",
229
+ "output":[["StartTag", "z", {}]]},
230
+
231
+ {"description":"<z ",
232
+ "input":"<z ",
233
+ "output":["ParseError", ["StartTag", "z", {}]]},
234
+
235
+ {"description":"<z/>",
236
+ "input":"<z/>",
237
+ "output":["ParseError", ["StartTag", "z", {}]]},
238
+
239
+ {"description":"<z/ ",
240
+ "input":"<z/ ",
241
+ "output":["ParseError", "ParseError", ["StartTag", "z", {}]]},
242
+
243
+ {"description":"<z//",
244
+ "input":"<z//",
245
+ "output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {}]]},
246
+
247
+ {"description":"<z",
248
+ "input":"<z",
249
+ "output":["ParseError", ["StartTag", "z", {}]]},
250
+
251
+ {"description":"</z",
252
+ "input":"</z",
253
+ "output":["ParseError", ["EndTag", "z"]]},
254
+
255
+ {"description":"<z0",
256
+ "input":"<z0",
257
+ "output":["ParseError", ["StartTag", "z0", {}]]},
258
+
259
+ {"description":"<z/0=>",
260
+ "input":"<z/0=>",
261
+ "output":["ParseError", ["StartTag", "z", {"0": ""}]]},
262
+
263
+ {"description":"<z/0= ",
264
+ "input":"<z/0= ",
265
+ "output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
266
+
267
+ {"description":"<z/0=?>",
268
+ "input":"<z/0=?>",
269
+ "output":["ParseError", ["StartTag", "z", {"0": "?"}]]},
270
+
271
+ {"description":"<z/0=? ",
272
+ "input":"<z/0=? ",
273
+ "output":["ParseError", "ParseError", ["StartTag", "z", {"0": "?"}]]},
274
+
275
+ {"description":"<z/0=??",
276
+ "input":"<z/0=??",
277
+ "output":["ParseError", "ParseError", ["StartTag", "z", {"0": "??"}]]},
278
+
279
+ {"description":"<z/0=''",
280
+ "input":"<z/0=''",
281
+ "output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
282
+
283
+ {"description":"<z/0='&",
284
+ "input":"<z/0='&",
285
+ "output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
286
+
287
+ {"description":"<z/0='%",
288
+ "input":"<z/0='%",
289
+ "output":["ParseError", "ParseError", ["StartTag", "z", {"0": "%"}]]},
290
+
291
+ {"description":"<z/0=\"'",
292
+ "input":"<z/0=\"'",
293
+ "output":["ParseError", "ParseError", ["StartTag", "z", {"0": "'"}]]},
294
+
295
+ {"description":"<z/0=\"\"",
296
+ "input":"<z/0=\"\"",
297
+ "output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
298
+
299
+ {"description":"<z/0=\"&",
300
+ "input":"<z/0=\"&",
301
+ "output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
302
+
303
+ {"description":"<z/0=&",
304
+ "input":"<z/0=&",
305
+ "output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
306
+
307
+ {"description":"<z/0>",
308
+ "input":"<z/0>",
309
+ "output":["ParseError", ["StartTag", "z", {"0": ""}]]},
310
+
311
+ {"description":"<z/0 =",
312
+ "input":"<z/0 =",
313
+ "output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
314
+
315
+ {"description":"<z/0 >",
316
+ "input":"<z/0 >",
317
+ "output":["ParseError", ["StartTag", "z", {"0": ""}]]},
318
+
319
+ {"description":"<z/0 ",
320
+ "input":"<z/0 ",
321
+ "output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
322
+
323
+ {"description":"<z/0 /",
324
+ "input":"<z/0 /",
325
+ "output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
326
+
327
+ {"description":"<z/0/",
328
+ "input":"<z/0/",
329
+ "output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
330
+
331
+ {"description":"<z/00",
332
+ "input":"<z/00",
333
+ "output":["ParseError", "ParseError", ["StartTag", "z", {"00": ""}]]},
334
+
335
+ {"description":"<z/0 0",
336
+ "input":"<z/0 0",
337
+ "output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
338
+
339
+ {"description":"<z/0='&#9",
340
+ "input":"<z/0='&#9",
341
+ "output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
342
+
343
+ {"description":"<z/0=\"&#9",
344
+ "input":"<z/0=\"&#9",
345
+ "output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
346
+
347
+ {"description":"<z/0=&#9",
348
+ "input":"<z/0=&#9",
349
+ "output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
350
+
351
+ {"description":"<z/0z",
352
+ "input":"<z/0z",
353
+ "output":["ParseError", "ParseError", ["StartTag", "z", {"0z": ""}]]},
354
+
355
+ {"description":"<z/0 z",
356
+ "input":"<z/0 z",
357
+ "output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "z": ""}]]},
358
+
359
+ {"description":"<zz",
360
+ "input":"<zz",
361
+ "output":["ParseError", ["StartTag", "zz", {}]]},
362
+
363
+ {"description":"<z/z",
364
+ "input":"<z/z",
365
+ "output":["ParseError", "ParseError", ["StartTag", "z", {"z": ""}]]}
366
+
367
+ ]}