feedtools 0.2.26 → 0.2.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. data/CHANGELOG +232 -216
  2. data/db/migration.rb +2 -0
  3. data/db/schema.mysql.sql +2 -0
  4. data/db/schema.postgresql.sql +3 -1
  5. data/db/schema.sqlite.sql +3 -1
  6. data/lib/feed_tools.rb +37 -14
  7. data/lib/feed_tools/database_feed_cache.rb +13 -2
  8. data/lib/feed_tools/feed.rb +430 -104
  9. data/lib/feed_tools/feed_item.rb +533 -268
  10. data/lib/feed_tools/helpers/generic_helper.rb +1 -1
  11. data/lib/feed_tools/helpers/html_helper.rb +78 -116
  12. data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
  13. data/lib/feed_tools/helpers/uri_helper.rb +46 -54
  14. data/lib/feed_tools/monkey_patch.rb +27 -1
  15. data/lib/feed_tools/vendor/html5/History.txt +10 -0
  16. data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
  17. data/lib/feed_tools/vendor/html5/README +45 -0
  18. data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
  19. data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
  20. data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
  21. data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
  22. data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
  23. data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
  24. data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
  25. data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
  26. data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
  27. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
  28. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
  29. data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
  30. data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
  31. data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
  32. data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
  33. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
  34. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  35. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
  36. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
  37. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
  38. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
  39. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
  40. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  41. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  42. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
  43. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
  44. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
  45. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
  46. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
  47. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
  48. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
  49. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
  50. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  51. data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
  52. data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
  53. data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
  54. data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
  55. data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
  56. data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
  57. data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
  58. data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
  59. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
  60. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
  61. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
  62. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
  63. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
  64. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
  65. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
  66. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
  67. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
  68. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
  69. data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
  70. data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
  71. data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
  72. data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
  73. data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
  74. data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
  75. data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
  76. data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
  77. data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
  78. data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
  79. data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
  80. data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
  81. data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
  82. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
  83. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
  84. data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
  85. data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
  86. data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
  87. data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
  88. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
  89. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
  90. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
  91. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
  92. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
  93. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
  94. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
  95. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
  96. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
  97. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
  98. data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
  99. data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
  100. data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
  101. data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
  102. data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
  103. data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
  104. data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
  105. data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
  106. data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
  107. data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
  108. data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
  109. data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
  110. data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
  111. data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
  112. data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
  113. data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
  114. data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
  115. data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
  116. data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
  117. data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
  118. data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
  119. data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
  120. data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
  121. data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
  122. data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
  123. data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
  124. data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
  125. data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
  126. data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
  127. data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
  128. data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
  129. data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
  130. data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
  131. data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
  132. data/lib/feed_tools/vendor/uri.rb +781 -0
  133. data/lib/feed_tools/version.rb +1 -1
  134. data/rakefile +27 -6
  135. data/test/unit/atom_test.rb +298 -210
  136. data/test/unit/helper_test.rb +7 -12
  137. data/test/unit/rdf_test.rb +51 -1
  138. data/test/unit/rss_test.rb +13 -3
  139. metadata +239 -116
  140. data/lib/feed_tools/vendor/htree.rb +0 -97
  141. data/lib/feed_tools/vendor/htree/container.rb +0 -10
  142. data/lib/feed_tools/vendor/htree/context.rb +0 -67
  143. data/lib/feed_tools/vendor/htree/display.rb +0 -27
  144. data/lib/feed_tools/vendor/htree/doc.rb +0 -149
  145. data/lib/feed_tools/vendor/htree/elem.rb +0 -262
  146. data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
  147. data/lib/feed_tools/vendor/htree/equality.rb +0 -218
  148. data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
  149. data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
  150. data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
  151. data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
  152. data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
  153. data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
  154. data/lib/feed_tools/vendor/htree/loc.rb +0 -367
  155. data/lib/feed_tools/vendor/htree/modules.rb +0 -48
  156. data/lib/feed_tools/vendor/htree/name.rb +0 -124
  157. data/lib/feed_tools/vendor/htree/output.rb +0 -207
  158. data/lib/feed_tools/vendor/htree/parse.rb +0 -409
  159. data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
  160. data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
  161. data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
  162. data/lib/feed_tools/vendor/htree/scan.rb +0 -166
  163. data/lib/feed_tools/vendor/htree/tag.rb +0 -111
  164. data/lib/feed_tools/vendor/htree/template.rb +0 -909
  165. data/lib/feed_tools/vendor/htree/text.rb +0 -115
  166. data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,773 @@
1
+ #data
2
+ <!DOCTYPE HTML>Test
3
+ #errors
4
+ #document
5
+ | <!DOCTYPE HTML>
6
+ | <html>
7
+ | <head>
8
+ | <body>
9
+ | "Test"
10
+
11
+ #data
12
+ <textarea>test</div>test
13
+ #errors
14
+ 10: missing document type declaration.
15
+ 25: unexpected end of file while parsing CDATA section for element textarea.
16
+ #document
17
+ | <html>
18
+ | <head>
19
+ | <body>
20
+ | <textarea>
21
+ | "test</div>test"
22
+
23
+ #data
24
+ <table><td>
25
+ #errors
26
+ 7: missing document type declaration.
27
+ 11: required tr element start tag implied by unexpected td element start tag.
28
+ 12: unexpected end of file implied table element end tag.
29
+ #document
30
+ | <html>
31
+ | <head>
32
+ | <body>
33
+ | <table>
34
+ | <tbody>
35
+ | <tr>
36
+ | <td>
37
+
38
+ #data
39
+ <table><td>test</tbody></table>
40
+ #errors
41
+ missing document type declarattion
42
+ Unexpected and of file
43
+ #document
44
+ | <html>
45
+ | <head>
46
+ | <body>
47
+ | <table>
48
+ | <tbody>
49
+ | <tr>
50
+ | <td>
51
+ | "test"
52
+
53
+ #data
54
+ <frame>test
55
+ #errors
56
+ missing document type declaration
57
+ frame element can't occur here
58
+ #document
59
+ | <html>
60
+ | <head>
61
+ | <body>
62
+ | "test"
63
+
64
+ #data
65
+ <!DOCTYPE HTML><frameset>test
66
+ #errors
67
+ frameset can't contain text
68
+ Unexpected end of file
69
+ #document
70
+ | <!DOCTYPE HTML>
71
+ | <html>
72
+ | <head>
73
+ | <frameset>
74
+
75
+ #data
76
+ <!DOCTYPE HTML><frameset><!DOCTYPE HTML>
77
+ #errors
78
+ document type declaration can only occur at the start of a document
79
+ Expected end tag </frameset>
80
+ #document
81
+ | <!DOCTYPE HTML>
82
+ | <html>
83
+ | <head>
84
+ | <frameset>
85
+
86
+ #data
87
+ <!DOCTYPE HTML><font><p><b>test</font>
88
+ #errors
89
+ AAA violation. </font>
90
+ AAA violation. </font>
91
+ #document
92
+ | <!DOCTYPE HTML>
93
+ | <html>
94
+ | <head>
95
+ | <body>
96
+ | <font>
97
+ | <p>
98
+ | <font>
99
+ | <b>
100
+ | "test"
101
+
102
+ #data
103
+ <!DOCTYPE HTML><dt><div><dd>
104
+ #errors
105
+ Missing end tag for <div>.
106
+ #document
107
+ | <!DOCTYPE HTML>
108
+ | <html>
109
+ | <head>
110
+ | <body>
111
+ | <dt>
112
+ | <div>
113
+ | <dd>
114
+
115
+ #data
116
+ <script></x
117
+ #errors
118
+ no document type
119
+ Unexpected end of file. Expected </script> end tag.
120
+ #document
121
+ | <html>
122
+ | <head>
123
+ | <script>
124
+ | "</x"
125
+ | <body>
126
+
127
+ #data
128
+ <table><plaintext><td>
129
+ #errors
130
+ no document type
131
+ <plaintext> directly inside table
132
+ Characters inside table.
133
+ Characters inside table. (XXX?)
134
+ Unexpected end of file.
135
+ #document
136
+ | <html>
137
+ | <head>
138
+ | <body>
139
+ | <plaintext>
140
+ | "<td>"
141
+ | <table>
142
+
143
+ #data
144
+ <plaintext></plaintext>
145
+ #errors
146
+ No DOCTYPE seen.
147
+ Unexpected end of file.
148
+ #document
149
+ | <html>
150
+ | <head>
151
+ | <body>
152
+ | <plaintext>
153
+ | "</plaintext>"
154
+
155
+ #data
156
+ <!DOCTYPE HTML><table><tr>TEST
157
+ #errors
158
+ TEST can't occur in <tr>
159
+ Unexpected end of file.
160
+ #document
161
+ | <!DOCTYPE HTML>
162
+ | <html>
163
+ | <head>
164
+ | <body>
165
+ | "TEST"
166
+ | <table>
167
+ | <tbody>
168
+ | <tr>
169
+
170
+ #data
171
+ <!DOCTYPE HTML><body t1=1><body t2=2><body t3=3 t4=4>
172
+ #errors
173
+ Unexpected start tag "body"
174
+ Unexpected start tag "body"
175
+ #document
176
+ | <!DOCTYPE HTML>
177
+ | <html>
178
+ | <head>
179
+ | <body>
180
+ | t1="1"
181
+ | t2="2"
182
+ | t3="3"
183
+ | t4="4"
184
+
185
+ #data
186
+ </b test
187
+ #errors
188
+ Unexpected EOF in attribute
189
+ Unexpected attribute in end tag.
190
+ No doctype.
191
+ Unexpected end tag.
192
+ #document
193
+ | <html>
194
+ | <head>
195
+ | <body>
196
+
197
+ #data
198
+ <!DOCTYPE HTML></b test<b &=&amp>X
199
+ #errors
200
+ End tag contains attributes.
201
+ Unexpected end tag.
202
+ Named entity didn't end with ;
203
+ #document
204
+ | <!DOCTYPE HTML>
205
+ | <html>
206
+ | <head>
207
+ | <body>
208
+ | "X"
209
+
210
+ #data
211
+ <!doctypehtml><scrIPt type=text/x-foobar;baz>X</SCRipt
212
+ #errors
213
+ No space after literal DOCTYPE.
214
+ Unexpected EOF in (end) tag name
215
+ #document
216
+ | <!DOCTYPE html>
217
+ | <html>
218
+ | <head>
219
+ | <script>
220
+ | type="text/x-foobar;baz"
221
+ | "X"
222
+ | <body>
223
+
224
+ #data
225
+ &
226
+ #errors
227
+ No doctype.
228
+ #document
229
+ | <html>
230
+ | <head>
231
+ | <body>
232
+ | "&"
233
+
234
+ #data
235
+ &#
236
+ #errors
237
+ No doctype.
238
+ Unfinished numeric entity.
239
+ #document
240
+ | <html>
241
+ | <head>
242
+ | <body>
243
+ | "&#"
244
+
245
+ #data
246
+ &#X
247
+ #errors
248
+ No doctype.
249
+ Unfinished hexadecimal entity.
250
+ #document
251
+ | <html>
252
+ | <head>
253
+ | <body>
254
+ | "&#X"
255
+
256
+ #data
257
+ &#x
258
+ #errors
259
+ No doctype.
260
+ Unfinished hexadecimal entity.
261
+ #document
262
+ | <html>
263
+ | <head>
264
+ | <body>
265
+ | "&#x"
266
+
267
+ #data
268
+ &#45
269
+ #errors
270
+ No doctype.
271
+ Numeric entity didn't end with ;
272
+ #document
273
+ | <html>
274
+ | <head>
275
+ | <body>
276
+ | "-"
277
+
278
+ #data
279
+ &x-test
280
+ #errors
281
+ No doctype.
282
+ Unfinished named entity.
283
+ #document
284
+ | <html>
285
+ | <head>
286
+ | <body>
287
+ | "&x-test"
288
+
289
+ #data
290
+ <!doctypehtml><p><li>
291
+ #errors
292
+ No space after literal DOCTYPE.
293
+ #document
294
+ | <!DOCTYPE html>
295
+ | <html>
296
+ | <head>
297
+ | <body>
298
+ | <p>
299
+ | <li>
300
+
301
+ #data
302
+ <!doctypeHTML><p><dt>
303
+ #errors
304
+ No space after literal DOCTYPE.
305
+ #document
306
+ | <!DOCTYPE HTML>
307
+ | <html>
308
+ | <head>
309
+ | <body>
310
+ | <p>
311
+ | <dt>
312
+
313
+ #data
314
+ <!doctypehtmL><p><dd>
315
+ #errors
316
+ No space after literal DOCTYPE.
317
+ #document
318
+ | <!DOCTYPE htmL>
319
+ | <html>
320
+ | <head>
321
+ | <body>
322
+ | <p>
323
+ | <dd>
324
+
325
+ #data
326
+ <!doctypehtml><p><form>
327
+ #errors
328
+ No space after literal DOCTYPE.
329
+ Unexpected EOF.
330
+ #document
331
+ | <!DOCTYPE html>
332
+ | <html>
333
+ | <head>
334
+ | <body>
335
+ | <p>
336
+ | <form>
337
+
338
+ #data
339
+ <!DOCTYPE HTML><p><b><i><u></p> <p>X
340
+ #errors
341
+ Unexpected end tag </p>.
342
+ Unexpected end EOF. Missing closing tags.
343
+ #document
344
+ | <!DOCTYPE HTML>
345
+ | <html>
346
+ | <head>
347
+ | <body>
348
+ | <p>
349
+ | <b>
350
+ | <i>
351
+ | <u>
352
+ | <b>
353
+ | <i>
354
+ | <u>
355
+ | " "
356
+ | <p>
357
+ | "X"
358
+
359
+ #data
360
+ <!DOCTYPE HTML><p></P>X
361
+ #errors
362
+ #document
363
+ | <!DOCTYPE HTML>
364
+ | <html>
365
+ | <head>
366
+ | <body>
367
+ | <p>
368
+ | "X"
369
+
370
+ #data
371
+ &AMP
372
+ #errors
373
+ No doctype.
374
+ No closing ; for the entity.
375
+ #document
376
+ | <html>
377
+ | <head>
378
+ | <body>
379
+ | "&"
380
+
381
+ #data
382
+ &AMp;
383
+ #errors
384
+ No doctype.
385
+ Invalid entity.
386
+ #document
387
+ | <html>
388
+ | <head>
389
+ | <body>
390
+ | "&AMp;"
391
+
392
+ #data
393
+ <!DOCTYPE HTML><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY>
394
+ #errors
395
+ Unexpected end of file.
396
+ #document
397
+ | <!DOCTYPE HTML>
398
+ | <html>
399
+ | <head>
400
+ | <body>
401
+ | <thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly>
402
+
403
+ #data
404
+ <!DOCTYPE HTML>X</body>X
405
+ #errors
406
+ Unexpected non-space characters in the after body phase.
407
+ #document
408
+ | <!DOCTYPE HTML>
409
+ | <html>
410
+ | <head>
411
+ | <body>
412
+ | "XX"
413
+
414
+ #data
415
+ <!DOCTYPE HTML><!-- X
416
+ #errors
417
+ Unexpected end of file in comment.
418
+ #document
419
+ | <!DOCTYPE HTML>
420
+ | <!-- X -->
421
+ | <html>
422
+ | <head>
423
+ | <body>
424
+
425
+ #data
426
+ <!DOCTYPE HTML><table><caption>test TEST</caption><td>test
427
+ #errors
428
+ Unexpected <td> in table body phase.
429
+ Unexpected end of file.
430
+ #document
431
+ | <!DOCTYPE HTML>
432
+ | <html>
433
+ | <head>
434
+ | <body>
435
+ | <table>
436
+ | <caption>
437
+ | "test TEST"
438
+ | <tbody>
439
+ | <tr>
440
+ | <td>
441
+ | "test"
442
+
443
+ #data
444
+ <!DOCTYPE HTML><select><option><optgroup>
445
+ #errors
446
+ Unexpected end of file. Missing closing tags.
447
+ #document
448
+ | <!DOCTYPE HTML>
449
+ | <html>
450
+ | <head>
451
+ | <body>
452
+ | <select>
453
+ | <option>
454
+ | <optgroup>
455
+
456
+ #data
457
+ <!DOCTYPE HTML><select><optgroup><option></optgroup><option><select><option>
458
+ #errors
459
+ Unexpected start tag <select> in <select>.
460
+ Unexpected start tag <option>.
461
+ #document
462
+ | <!DOCTYPE HTML>
463
+ | <html>
464
+ | <head>
465
+ | <body>
466
+ | <select>
467
+ | <optgroup>
468
+ | <option>
469
+ | <option>
470
+
471
+ #data
472
+ <!DOCTYPE HTML><select><optgroup><option><optgroup>
473
+ #errors
474
+ Unexpected end of file. Missing closing tags.
475
+ #document
476
+ | <!DOCTYPE HTML>
477
+ | <html>
478
+ | <head>
479
+ | <body>
480
+ | <select>
481
+ | <optgroup>
482
+ | <option>
483
+ | <optgroup>
484
+
485
+ #data
486
+ <!DOCTYPE HTML><font><input><input></font>
487
+ #errors
488
+ #document
489
+ | <!DOCTYPE HTML>
490
+ | <html>
491
+ | <head>
492
+ | <body>
493
+ | <font>
494
+ | <input>
495
+ | <input>
496
+
497
+ #data
498
+ <!DOCTYPE HTML><!-- XXX - XXX -->
499
+ #errors
500
+ #document
501
+ | <!DOCTYPE HTML>
502
+ | <!-- XXX - XXX -->
503
+ | <html>
504
+ | <head>
505
+ | <body>
506
+
507
+ #data
508
+ <!DOCTYPE HTML><!-- XXX - XXX
509
+ #errors
510
+ Unexpected EOF in comment.
511
+ #document
512
+ | <!DOCTYPE HTML>
513
+ | <!-- XXX - XXX -->
514
+ | <html>
515
+ | <head>
516
+ | <body>
517
+
518
+ #data
519
+ <!DOCTYPE HTML><!-- XXX - XXX - XXX -->
520
+ #errors
521
+ #document
522
+ | <!DOCTYPE HTML>
523
+ | <!-- XXX - XXX - XXX -->
524
+ | <html>
525
+ | <head>
526
+ | <body>
527
+
528
+ #data
529
+ <isindex test=x name=x>
530
+ #errors
531
+ No doctype
532
+ <isindex> is not ok!
533
+ #document
534
+ | <html>
535
+ | <head>
536
+ | <body>
537
+ | <form>
538
+ | <hr>
539
+ | <p>
540
+ | <label>
541
+ | "This is a searchable index. Insert your search keywords here: "
542
+ | <input>
543
+ | name="isindex"
544
+ | test="x"
545
+ | <hr>
546
+
547
+ #data
548
+ test
549
+ test
550
+ #errors
551
+ No doctype
552
+ #document
553
+ | <html>
554
+ | <head>
555
+ | <body>
556
+ | "test
557
+ test"
558
+
559
+ #data
560
+ <p><b><i><u></p>
561
+ <p>X
562
+ #errors
563
+ No doctype
564
+ Unexpected end tag p.
565
+ Unexpected EOF.
566
+ #document
567
+ | <html>
568
+ | <head>
569
+ | <body>
570
+ | <p>
571
+ | <b>
572
+ | <i>
573
+ | <u>
574
+ | <b>
575
+ | <i>
576
+ | <u>
577
+ | "
578
+ "
579
+ | <p>
580
+ | "X"
581
+
582
+ #data
583
+ <!DOCTYPE HTML><body><title>test</body></title>
584
+ #errors
585
+ Unexpected start tag that belongs in the head.
586
+ #document
587
+ | <!DOCTYPE HTML>
588
+ | <html>
589
+ | <head>
590
+ | <title>
591
+ | "test</body>"
592
+ | <body>
593
+
594
+ #data
595
+ <!DOCTYPE HTML><body><title>X</title><meta name=z><link rel=foo><style>
596
+ x { content:"</style" } </style>
597
+ #errors
598
+ Unexpected start tag that belongs in head. <title>
599
+ #document
600
+ | <!DOCTYPE HTML>
601
+ | <html>
602
+ | <head>
603
+ | <title>
604
+ | "X"
605
+ | <body>
606
+ | <meta>
607
+ | name="z"
608
+ | <link>
609
+ | rel="foo"
610
+ | <style>
611
+ | "
612
+ x { content:"</style" } "
613
+
614
+ #data
615
+ <!DOCTYPE HTML><select><optgroup></optgroup></select>
616
+ #errors
617
+ #document
618
+ | <!DOCTYPE HTML>
619
+ | <html>
620
+ | <head>
621
+ | <body>
622
+ | <select>
623
+ | <optgroup>
624
+
625
+ #data
626
+
627
+
628
+ #errors
629
+ No doctype.
630
+ #document
631
+ | <html>
632
+ | <head>
633
+ | <body>
634
+
635
+ #data
636
+ <!DOCTYPE HTML> <html>
637
+ #errors
638
+ #document
639
+ | <!DOCTYPE HTML>
640
+ | <html>
641
+ | <head>
642
+ | <body>
643
+
644
+ #data
645
+ <!DOCTYPE HTML><script>
646
+ </script> <title>x</title> </head>
647
+ #errors
648
+ #document
649
+ | <!DOCTYPE HTML>
650
+ | <html>
651
+ | <head>
652
+ | <script>
653
+ | "
654
+ "
655
+ | " "
656
+ | <title>
657
+ | "x"
658
+ | " "
659
+ | <body>
660
+
661
+ #data
662
+ <!DOCTYPE HTML><html><body><html id=x>
663
+ #errors
664
+ duplicate html start tag
665
+ #document
666
+ | <!DOCTYPE HTML>
667
+ | <html>
668
+ | id="x"
669
+ | <head>
670
+ | <body>
671
+
672
+ #data
673
+ <!DOCTYPE HTML>X</body><html id="x">
674
+ #errors
675
+ Unexpected html start tag in the after body phase.
676
+ html needs to be the first start tag.
677
+ #document
678
+ | <!DOCTYPE HTML>
679
+ | <html>
680
+ | id="x"
681
+ | <head>
682
+ | <body>
683
+ | "X"
684
+
685
+ #data
686
+ <!DOCTYPE HTML><head><html id=x>
687
+ #errors
688
+ html start tag too late
689
+ #document
690
+ | <!DOCTYPE HTML>
691
+ | <html>
692
+ | id="x"
693
+ | <head>
694
+ | <body>
695
+
696
+ #data
697
+ <!DOCTYPE HTML>X</html>X
698
+ #errors
699
+ Unexpected non-space characters. Expected end of file.
700
+ Unexpected non-space characters in after body phase. Expected end of file.
701
+ #document
702
+ | <!DOCTYPE HTML>
703
+ | <html>
704
+ | <head>
705
+ | <body>
706
+ | "XX"
707
+
708
+ #data
709
+ <!DOCTYPE HTML>X</html>
710
+ #errors
711
+ #document
712
+ | <!DOCTYPE HTML>
713
+ | <html>
714
+ | <head>
715
+ | <body>
716
+ | "X "
717
+
718
+ #data
719
+ <!DOCTYPE HTML>X</html><p>X
720
+ #errors
721
+ Unexpected start tag <p> in trailing end phase.
722
+ Unexpected start tag <p> in after body phase.
723
+ #document
724
+ | <!DOCTYPE HTML>
725
+ | <html>
726
+ | <head>
727
+ | <body>
728
+ | "X"
729
+ | <p>
730
+ | "X"
731
+
732
+ #data
733
+ <!DOCTYPE HTML>X<p/x/y/z>
734
+ #errors
735
+ Solidus (/) incorrectly placed.
736
+ Solidus (/) incorrectly placed.
737
+ Solidus (/) incorrectly placed.
738
+ #document
739
+ | <!DOCTYPE HTML>
740
+ | <html>
741
+ | <head>
742
+ | <body>
743
+ | "X"
744
+ | <p>
745
+ | x=""
746
+ | y=""
747
+ | z=""
748
+
749
+ #data
750
+ <!DOCTYPE HTML><!--x--
751
+ #errors
752
+ Unexpected end of file in comment.
753
+ #document
754
+ | <!DOCTYPE HTML>
755
+ | <!-- x -->
756
+ | <html>
757
+ | <head>
758
+ | <body>
759
+
760
+ #data
761
+ <!DOCTYPE HTML><table><tr><td></p></table>
762
+ #errors
763
+ Unexpected </p> end tag.
764
+ #document
765
+ | <!DOCTYPE HTML>
766
+ | <html>
767
+ | <head>
768
+ | <body>
769
+ | <table>
770
+ | <tbody>
771
+ | <tr>
772
+ | <td>
773
+ | <p>