rfeedparser 0.9.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3332) hide show
  1. data/LICENSE +68 -0
  2. data/README +28 -0
  3. data/RUBY-TESTING +60 -0
  4. data/lib/feedparser.rb +3671 -0
  5. data/tests/feedparserserver.rb +115 -0
  6. data/tests/feedparsertest.rb +196 -0
  7. data/tests/illformed/amp/amp01.xml +9 -0
  8. data/tests/illformed/amp/amp02.xml +9 -0
  9. data/tests/illformed/amp/amp03.xml +9 -0
  10. data/tests/illformed/amp/amp04.xml +9 -0
  11. data/tests/illformed/amp/amp05.xml +9 -0
  12. data/tests/illformed/amp/amp06.xml +9 -0
  13. data/tests/illformed/amp/amp07.xml +9 -0
  14. data/tests/illformed/amp/amp08.xml +9 -0
  15. data/tests/illformed/amp/amp09.xml +9 -0
  16. data/tests/illformed/amp/amp10.xml +9 -0
  17. data/tests/illformed/amp/amp11.xml +9 -0
  18. data/tests/illformed/amp/amp12.xml +9 -0
  19. data/tests/illformed/amp/amp13.xml +9 -0
  20. data/tests/illformed/amp/amp14.xml +9 -0
  21. data/tests/illformed/amp/amp15.xml +9 -0
  22. data/tests/illformed/amp/amp16.xml +9 -0
  23. data/tests/illformed/amp/amp17.xml +9 -0
  24. data/tests/illformed/amp/amp18.xml +9 -0
  25. data/tests/illformed/amp/amp19.xml +9 -0
  26. data/tests/illformed/amp/amp20.xml +9 -0
  27. data/tests/illformed/amp/amp21.xml +9 -0
  28. data/tests/illformed/amp/amp22.xml +9 -0
  29. data/tests/illformed/amp/amp23.xml +9 -0
  30. data/tests/illformed/amp/amp24.xml +9 -0
  31. data/tests/illformed/amp/amp25.xml +9 -0
  32. data/tests/illformed/amp/amp26.xml +9 -0
  33. data/tests/illformed/amp/amp27.xml +9 -0
  34. data/tests/illformed/amp/amp28.xml +9 -0
  35. data/tests/illformed/amp/amp29.xml +9 -0
  36. data/tests/illformed/amp/amp30.xml +9 -0
  37. data/tests/illformed/amp/amp31.xml +9 -0
  38. data/tests/illformed/amp/amp32.xml +9 -0
  39. data/tests/illformed/amp/amp33.xml +9 -0
  40. data/tests/illformed/amp/amp34.xml +9 -0
  41. data/tests/illformed/amp/amp35.xml +9 -0
  42. data/tests/illformed/amp/amp36.xml +9 -0
  43. data/tests/illformed/amp/amp37.xml +9 -0
  44. data/tests/illformed/amp/amp38.xml +9 -0
  45. data/tests/illformed/amp/amp39.xml +9 -0
  46. data/tests/illformed/amp/amp40.xml +9 -0
  47. data/tests/illformed/amp/amp41.xml +9 -0
  48. data/tests/illformed/amp/amp42.xml +9 -0
  49. data/tests/illformed/amp/amp43.xml +9 -0
  50. data/tests/illformed/amp/amp44.xml +9 -0
  51. data/tests/illformed/amp/amp45.xml +9 -0
  52. data/tests/illformed/amp/amp46.xml +9 -0
  53. data/tests/illformed/amp/amp47.xml +9 -0
  54. data/tests/illformed/amp/amp48.xml +9 -0
  55. data/tests/illformed/amp/amp49.xml +9 -0
  56. data/tests/illformed/amp/amp50.xml +9 -0
  57. data/tests/illformed/amp/amp51.xml +9 -0
  58. data/tests/illformed/amp/amp52.xml +9 -0
  59. data/tests/illformed/amp/amp53.xml +9 -0
  60. data/tests/illformed/amp/amp54.xml +9 -0
  61. data/tests/illformed/amp/amp55.xml +9 -0
  62. data/tests/illformed/amp/amp56.xml +9 -0
  63. data/tests/illformed/amp/amp57.xml +9 -0
  64. data/tests/illformed/amp/amp58.xml +9 -0
  65. data/tests/illformed/amp/amp59.xml +9 -0
  66. data/tests/illformed/amp/amp60.xml +9 -0
  67. data/tests/illformed/amp/amp61.xml +9 -0
  68. data/tests/illformed/amp/amp62.xml +9 -0
  69. data/tests/illformed/amp/amp63.xml +9 -0
  70. data/tests/illformed/amp/amp64.xml +9 -0
  71. data/tests/illformed/atom/atom_namespace_1.xml +7 -0
  72. data/tests/illformed/atom/atom_namespace_2.xml +7 -0
  73. data/tests/illformed/atom/atom_namespace_3.xml +7 -0
  74. data/tests/illformed/atom/atom_namespace_4.xml +7 -0
  75. data/tests/illformed/atom/atom_namespace_5.xml +7 -0
  76. data/tests/illformed/atom/entry_author_email.xml +13 -0
  77. data/tests/illformed/atom/entry_author_homepage.xml +13 -0
  78. data/tests/illformed/atom/entry_author_map_author.xml +13 -0
  79. data/tests/illformed/atom/entry_author_map_author_2.xml +12 -0
  80. data/tests/illformed/atom/entry_author_name.xml +13 -0
  81. data/tests/illformed/atom/entry_author_uri.xml +13 -0
  82. data/tests/illformed/atom/entry_author_url.xml +13 -0
  83. data/tests/illformed/atom/entry_content_mode_base64.xml +11 -0
  84. data/tests/illformed/atom/entry_content_mode_escaped.xml +9 -0
  85. data/tests/illformed/atom/entry_content_type.xml +9 -0
  86. data/tests/illformed/atom/entry_content_type_text_plain.xml +9 -0
  87. data/tests/illformed/atom/entry_content_value.xml +9 -0
  88. data/tests/illformed/atom/entry_contributor_email.xml +13 -0
  89. data/tests/illformed/atom/entry_contributor_homepage.xml +13 -0
  90. data/tests/illformed/atom/entry_contributor_multiple.xml +18 -0
  91. data/tests/illformed/atom/entry_contributor_name.xml +13 -0
  92. data/tests/illformed/atom/entry_contributor_uri.xml +13 -0
  93. data/tests/illformed/atom/entry_contributor_url.xml +13 -0
  94. data/tests/illformed/atom/entry_id.xml +9 -0
  95. data/tests/illformed/atom/entry_id_map_guid.xml +9 -0
  96. data/tests/illformed/atom/entry_link_alternate_map_link.xml +9 -0
  97. data/tests/illformed/atom/entry_link_alternate_map_link_2.xml +9 -0
  98. data/tests/illformed/atom/entry_link_href.xml +9 -0
  99. data/tests/illformed/atom/entry_link_multiple.xml +10 -0
  100. data/tests/illformed/atom/entry_link_rel.xml +9 -0
  101. data/tests/illformed/atom/entry_link_title.xml +9 -0
  102. data/tests/illformed/atom/entry_link_type.xml +9 -0
  103. data/tests/illformed/atom/entry_summary.xml +9 -0
  104. data/tests/illformed/atom/entry_summary_base64.xml +11 -0
  105. data/tests/illformed/atom/entry_summary_base64_2.xml +11 -0
  106. data/tests/illformed/atom/entry_summary_content_mode_base64.xml +11 -0
  107. data/tests/illformed/atom/entry_summary_content_mode_escaped.xml +9 -0
  108. data/tests/illformed/atom/entry_summary_content_type.xml +9 -0
  109. data/tests/illformed/atom/entry_summary_content_type_text_plain.xml +9 -0
  110. data/tests/illformed/atom/entry_summary_content_value.xml +9 -0
  111. data/tests/illformed/atom/entry_summary_escaped_markup.xml +9 -0
  112. data/tests/illformed/atom/entry_summary_inline_markup.xml +9 -0
  113. data/tests/illformed/atom/entry_summary_inline_markup_2.xml +9 -0
  114. data/tests/illformed/atom/entry_summary_naked_markup.xml +9 -0
  115. data/tests/illformed/atom/entry_summary_text_plain.xml +9 -0
  116. data/tests/illformed/atom/entry_title.xml +9 -0
  117. data/tests/illformed/atom/entry_title_base64.xml +11 -0
  118. data/tests/illformed/atom/entry_title_base64_2.xml +11 -0
  119. data/tests/illformed/atom/entry_title_content_mode_base64.xml +11 -0
  120. data/tests/illformed/atom/entry_title_content_mode_escaped.xml +9 -0
  121. data/tests/illformed/atom/entry_title_content_type.xml +9 -0
  122. data/tests/illformed/atom/entry_title_content_type_text_plain.xml +9 -0
  123. data/tests/illformed/atom/entry_title_content_value.xml +9 -0
  124. data/tests/illformed/atom/entry_title_escaped_markup.xml +9 -0
  125. data/tests/illformed/atom/entry_title_inline_markup.xml +9 -0
  126. data/tests/illformed/atom/entry_title_inline_markup_2.xml +9 -0
  127. data/tests/illformed/atom/entry_title_naked_markup.xml +9 -0
  128. data/tests/illformed/atom/entry_title_text_plain.xml +9 -0
  129. data/tests/illformed/atom/entry_title_text_plain_brackets.xml +9 -0
  130. data/tests/illformed/atom/feed_author_email.xml +11 -0
  131. data/tests/illformed/atom/feed_author_homepage.xml +11 -0
  132. data/tests/illformed/atom/feed_author_map_author.xml +11 -0
  133. data/tests/illformed/atom/feed_author_map_author_2.xml +10 -0
  134. data/tests/illformed/atom/feed_author_name.xml +11 -0
  135. data/tests/illformed/atom/feed_author_uri.xml +11 -0
  136. data/tests/illformed/atom/feed_author_url.xml +11 -0
  137. data/tests/illformed/atom/feed_contributor_email.xml +11 -0
  138. data/tests/illformed/atom/feed_contributor_homepage.xml +11 -0
  139. data/tests/illformed/atom/feed_contributor_multiple.xml +16 -0
  140. data/tests/illformed/atom/feed_contributor_name.xml +11 -0
  141. data/tests/illformed/atom/feed_contributor_uri.xml +11 -0
  142. data/tests/illformed/atom/feed_contributor_url.xml +11 -0
  143. data/tests/illformed/atom/feed_copyright.xml +7 -0
  144. data/tests/illformed/atom/feed_copyright_base64.xml +9 -0
  145. data/tests/illformed/atom/feed_copyright_base64_2.xml +9 -0
  146. data/tests/illformed/atom/feed_copyright_content_mode_base64.xml +9 -0
  147. data/tests/illformed/atom/feed_copyright_content_mode_escaped.xml +7 -0
  148. data/tests/illformed/atom/feed_copyright_content_type.xml +7 -0
  149. data/tests/illformed/atom/feed_copyright_content_type_text_plain.xml +7 -0
  150. data/tests/illformed/atom/feed_copyright_content_value.xml +7 -0
  151. data/tests/illformed/atom/feed_copyright_escaped_markup.xml +7 -0
  152. data/tests/illformed/atom/feed_copyright_inline_markup.xml +7 -0
  153. data/tests/illformed/atom/feed_copyright_inline_markup_2.xml +7 -0
  154. data/tests/illformed/atom/feed_copyright_naked_markup.xml +7 -0
  155. data/tests/illformed/atom/feed_copyright_text_plain.xml +7 -0
  156. data/tests/illformed/atom/feed_generator.xml +7 -0
  157. data/tests/illformed/atom/feed_generator_name.xml +7 -0
  158. data/tests/illformed/atom/feed_generator_url.xml +7 -0
  159. data/tests/illformed/atom/feed_generator_version.xml +7 -0
  160. data/tests/illformed/atom/feed_id.xml +7 -0
  161. data/tests/illformed/atom/feed_id_map_guid.xml +7 -0
  162. data/tests/illformed/atom/feed_info.xml +7 -0
  163. data/tests/illformed/atom/feed_info_base64.xml +9 -0
  164. data/tests/illformed/atom/feed_info_base64_2.xml +9 -0
  165. data/tests/illformed/atom/feed_info_content_mode_base64.xml +9 -0
  166. data/tests/illformed/atom/feed_info_content_mode_escaped.xml +7 -0
  167. data/tests/illformed/atom/feed_info_content_type.xml +7 -0
  168. data/tests/illformed/atom/feed_info_content_type_text_plain.xml +7 -0
  169. data/tests/illformed/atom/feed_info_content_value.xml +7 -0
  170. data/tests/illformed/atom/feed_info_escaped_markup.xml +7 -0
  171. data/tests/illformed/atom/feed_info_inline_markup.xml +7 -0
  172. data/tests/illformed/atom/feed_info_inline_markup_2.xml +7 -0
  173. data/tests/illformed/atom/feed_info_naked_markup.xml +7 -0
  174. data/tests/illformed/atom/feed_info_text_plain.xml +7 -0
  175. data/tests/illformed/atom/feed_link_alternate_map_link.xml +7 -0
  176. data/tests/illformed/atom/feed_link_alternate_map_link_2.xml +7 -0
  177. data/tests/illformed/atom/feed_link_href.xml +7 -0
  178. data/tests/illformed/atom/feed_link_multiple.xml +8 -0
  179. data/tests/illformed/atom/feed_link_rel.xml +7 -0
  180. data/tests/illformed/atom/feed_link_title.xml +7 -0
  181. data/tests/illformed/atom/feed_link_type.xml +7 -0
  182. data/tests/illformed/atom/feed_tagline.xml +7 -0
  183. data/tests/illformed/atom/feed_tagline_base64.xml +9 -0
  184. data/tests/illformed/atom/feed_tagline_base64_2.xml +9 -0
  185. data/tests/illformed/atom/feed_tagline_content_mode_base64.xml +9 -0
  186. data/tests/illformed/atom/feed_tagline_content_mode_escaped.xml +7 -0
  187. data/tests/illformed/atom/feed_tagline_content_type.xml +7 -0
  188. data/tests/illformed/atom/feed_tagline_content_type_text_plain.xml +7 -0
  189. data/tests/illformed/atom/feed_tagline_content_value.xml +7 -0
  190. data/tests/illformed/atom/feed_tagline_escaped_markup.xml +7 -0
  191. data/tests/illformed/atom/feed_tagline_inline_markup.xml +7 -0
  192. data/tests/illformed/atom/feed_tagline_inline_markup_2.xml +7 -0
  193. data/tests/illformed/atom/feed_tagline_naked_markup.xml +7 -0
  194. data/tests/illformed/atom/feed_tagline_text_plain.xml +7 -0
  195. data/tests/illformed/atom/feed_title.xml +7 -0
  196. data/tests/illformed/atom/feed_title_base64.xml +9 -0
  197. data/tests/illformed/atom/feed_title_base64_2.xml +9 -0
  198. data/tests/illformed/atom/feed_title_content_mode_base64.xml +9 -0
  199. data/tests/illformed/atom/feed_title_content_mode_escaped.xml +7 -0
  200. data/tests/illformed/atom/feed_title_content_type.xml +7 -0
  201. data/tests/illformed/atom/feed_title_content_type_text_plain.xml +7 -0
  202. data/tests/illformed/atom/feed_title_content_value.xml +7 -0
  203. data/tests/illformed/atom/feed_title_escaped_markup.xml +7 -0
  204. data/tests/illformed/atom/feed_title_inline_markup.xml +7 -0
  205. data/tests/illformed/atom/feed_title_inline_markup_2.xml +7 -0
  206. data/tests/illformed/atom/feed_title_naked_markup.xml +7 -0
  207. data/tests/illformed/atom/feed_title_text_plain.xml +7 -0
  208. data/tests/illformed/atom/relative_uri.xml +7 -0
  209. data/tests/illformed/atom/relative_uri_inherit.xml +7 -0
  210. data/tests/illformed/atom/relative_uri_inherit_2.xml +7 -0
  211. data/tests/illformed/atom10/atom10_namespace.xml +7 -0
  212. data/tests/illformed/atom10/atom10_version.xml +6 -0
  213. data/tests/illformed/atom10/entry_author_email.xml +13 -0
  214. data/tests/illformed/atom10/entry_author_map_author.xml +13 -0
  215. data/tests/illformed/atom10/entry_author_map_author_2.xml +12 -0
  216. data/tests/illformed/atom10/entry_author_name.xml +13 -0
  217. data/tests/illformed/atom10/entry_author_uri.xml +13 -0
  218. data/tests/illformed/atom10/entry_author_url.xml +13 -0
  219. data/tests/illformed/atom10/entry_category_label.xml +9 -0
  220. data/tests/illformed/atom10/entry_category_scheme.xml +9 -0
  221. data/tests/illformed/atom10/entry_category_term.xml +9 -0
  222. data/tests/illformed/atom10/entry_content_application_xml.xml +9 -0
  223. data/tests/illformed/atom10/entry_content_base64.xml +11 -0
  224. data/tests/illformed/atom10/entry_content_base64_2.xml +11 -0
  225. data/tests/illformed/atom10/entry_content_escaped_markup.xml +9 -0
  226. data/tests/illformed/atom10/entry_content_inline_markup.xml +9 -0
  227. data/tests/illformed/atom10/entry_content_inline_markup_2.xml +9 -0
  228. data/tests/illformed/atom10/entry_content_src.xml +9 -0
  229. data/tests/illformed/atom10/entry_content_text_plain.xml +9 -0
  230. data/tests/illformed/atom10/entry_content_text_plain_brackets.xml +9 -0
  231. data/tests/illformed/atom10/entry_content_type.xml +9 -0
  232. data/tests/illformed/atom10/entry_content_type_text.xml +9 -0
  233. data/tests/illformed/atom10/entry_content_value.xml +9 -0
  234. data/tests/illformed/atom10/entry_contributor_email.xml +13 -0
  235. data/tests/illformed/atom10/entry_contributor_multiple.xml +18 -0
  236. data/tests/illformed/atom10/entry_contributor_name.xml +13 -0
  237. data/tests/illformed/atom10/entry_contributor_uri.xml +13 -0
  238. data/tests/illformed/atom10/entry_contributor_url.xml +13 -0
  239. data/tests/illformed/atom10/entry_id.xml +9 -0
  240. data/tests/illformed/atom10/entry_id_map_guid.xml +9 -0
  241. data/tests/illformed/atom10/entry_id_no_normalization_1.xml +9 -0
  242. data/tests/illformed/atom10/entry_id_no_normalization_2.xml +9 -0
  243. data/tests/illformed/atom10/entry_id_no_normalization_3.xml +9 -0
  244. data/tests/illformed/atom10/entry_id_no_normalization_4.xml +9 -0
  245. data/tests/illformed/atom10/entry_id_no_normalization_5.xml +9 -0
  246. data/tests/illformed/atom10/entry_id_no_normalization_6.xml +9 -0
  247. data/tests/illformed/atom10/entry_id_no_normalization_7.xml +9 -0
  248. data/tests/illformed/atom10/entry_link_alternate_map_link.xml +9 -0
  249. data/tests/illformed/atom10/entry_link_alternate_map_link_2.xml +9 -0
  250. data/tests/illformed/atom10/entry_link_alternate_map_link_3.xml +11 -0
  251. data/tests/illformed/atom10/entry_link_href.xml +9 -0
  252. data/tests/illformed/atom10/entry_link_hreflang.xml +9 -0
  253. data/tests/illformed/atom10/entry_link_length.xml +9 -0
  254. data/tests/illformed/atom10/entry_link_multiple.xml +10 -0
  255. data/tests/illformed/atom10/entry_link_no_rel.xml +9 -0
  256. data/tests/illformed/atom10/entry_link_rel.xml +9 -0
  257. data/tests/illformed/atom10/entry_link_rel_enclosure.xml +9 -0
  258. data/tests/illformed/atom10/entry_link_rel_enclosure_map_enclosure_length.xml +9 -0
  259. data/tests/illformed/atom10/entry_link_rel_enclosure_map_enclosure_type.xml +9 -0
  260. data/tests/illformed/atom10/entry_link_rel_enclosure_map_enclosure_url.xml +9 -0
  261. data/tests/illformed/atom10/entry_link_rel_other.xml +9 -0
  262. data/tests/illformed/atom10/entry_link_rel_related.xml +9 -0
  263. data/tests/illformed/atom10/entry_link_rel_self.xml +9 -0
  264. data/tests/illformed/atom10/entry_link_rel_via.xml +9 -0
  265. data/tests/illformed/atom10/entry_link_title.xml +9 -0
  266. data/tests/illformed/atom10/entry_link_type.xml +9 -0
  267. data/tests/illformed/atom10/entry_rights.xml +9 -0
  268. data/tests/illformed/atom10/entry_rights_content_value.xml +9 -0
  269. data/tests/illformed/atom10/entry_rights_escaped_markup.xml +9 -0
  270. data/tests/illformed/atom10/entry_rights_inline_markup.xml +9 -0
  271. data/tests/illformed/atom10/entry_rights_inline_markup_2.xml +9 -0
  272. data/tests/illformed/atom10/entry_rights_text_plain.xml +9 -0
  273. data/tests/illformed/atom10/entry_rights_text_plain_brackets.xml +9 -0
  274. data/tests/illformed/atom10/entry_rights_type_default.xml +9 -0
  275. data/tests/illformed/atom10/entry_rights_type_text.xml +9 -0
  276. data/tests/illformed/atom10/entry_source_author_email.xml +15 -0
  277. data/tests/illformed/atom10/entry_source_author_map_author.xml +15 -0
  278. data/tests/illformed/atom10/entry_source_author_map_author_2.xml +14 -0
  279. data/tests/illformed/atom10/entry_source_author_name.xml +15 -0
  280. data/tests/illformed/atom10/entry_source_author_uri.xml +15 -0
  281. data/tests/illformed/atom10/entry_source_category_label.xml +11 -0
  282. data/tests/illformed/atom10/entry_source_category_scheme.xml +11 -0
  283. data/tests/illformed/atom10/entry_source_category_term.xml +11 -0
  284. data/tests/illformed/atom10/entry_source_contributor_email.xml +15 -0
  285. data/tests/illformed/atom10/entry_source_contributor_multiple.xml +20 -0
  286. data/tests/illformed/atom10/entry_source_contributor_name.xml +15 -0
  287. data/tests/illformed/atom10/entry_source_contributor_uri.xml +15 -0
  288. data/tests/illformed/atom10/entry_source_generator.xml +11 -0
  289. data/tests/illformed/atom10/entry_source_generator_name.xml +11 -0
  290. data/tests/illformed/atom10/entry_source_generator_uri.xml +11 -0
  291. data/tests/illformed/atom10/entry_source_generator_version.xml +11 -0
  292. data/tests/illformed/atom10/entry_source_icon.xml +11 -0
  293. data/tests/illformed/atom10/entry_source_id.xml +11 -0
  294. data/tests/illformed/atom10/entry_source_link_alternate_map_link.xml +11 -0
  295. data/tests/illformed/atom10/entry_source_link_alternate_map_link_2.xml +11 -0
  296. data/tests/illformed/atom10/entry_source_link_href.xml +11 -0
  297. data/tests/illformed/atom10/entry_source_link_hreflang.xml +11 -0
  298. data/tests/illformed/atom10/entry_source_link_length.xml +11 -0
  299. data/tests/illformed/atom10/entry_source_link_multiple.xml +12 -0
  300. data/tests/illformed/atom10/entry_source_link_no_rel.xml +11 -0
  301. data/tests/illformed/atom10/entry_source_link_rel.xml +11 -0
  302. data/tests/illformed/atom10/entry_source_link_rel_other.xml +11 -0
  303. data/tests/illformed/atom10/entry_source_link_rel_related.xml +11 -0
  304. data/tests/illformed/atom10/entry_source_link_rel_self.xml +11 -0
  305. data/tests/illformed/atom10/entry_source_link_rel_via.xml +11 -0
  306. data/tests/illformed/atom10/entry_source_link_title.xml +11 -0
  307. data/tests/illformed/atom10/entry_source_link_type.xml +11 -0
  308. data/tests/illformed/atom10/entry_source_logo.xml +11 -0
  309. data/tests/illformed/atom10/entry_source_rights.xml +11 -0
  310. data/tests/illformed/atom10/entry_source_rights_base64.xml +13 -0
  311. data/tests/illformed/atom10/entry_source_rights_base64_2.xml +13 -0
  312. data/tests/illformed/atom10/entry_source_rights_content_type.xml +11 -0
  313. data/tests/illformed/atom10/entry_source_rights_content_type_text.xml +11 -0
  314. data/tests/illformed/atom10/entry_source_rights_content_value.xml +11 -0
  315. data/tests/illformed/atom10/entry_source_rights_escaped_markup.xml +11 -0
  316. data/tests/illformed/atom10/entry_source_rights_inline_markup.xml +11 -0
  317. data/tests/illformed/atom10/entry_source_rights_inline_markup_2.xml +11 -0
  318. data/tests/illformed/atom10/entry_source_rights_text_plain.xml +11 -0
  319. data/tests/illformed/atom10/entry_source_subittle_content_type_text.xml +11 -0
  320. data/tests/illformed/atom10/entry_source_subtitle.xml +11 -0
  321. data/tests/illformed/atom10/entry_source_subtitle_base64.xml +13 -0
  322. data/tests/illformed/atom10/entry_source_subtitle_base64_2.xml +13 -0
  323. data/tests/illformed/atom10/entry_source_subtitle_content_type.xml +11 -0
  324. data/tests/illformed/atom10/entry_source_subtitle_content_value.xml +11 -0
  325. data/tests/illformed/atom10/entry_source_subtitle_escaped_markup.xml +11 -0
  326. data/tests/illformed/atom10/entry_source_subtitle_inline_markup.xml +11 -0
  327. data/tests/illformed/atom10/entry_source_subtitle_inline_markup_2.xml +11 -0
  328. data/tests/illformed/atom10/entry_source_subtitle_text_plain.xml +11 -0
  329. data/tests/illformed/atom10/entry_source_title.xml +11 -0
  330. data/tests/illformed/atom10/entry_source_title_base64.xml +13 -0
  331. data/tests/illformed/atom10/entry_source_title_base64_2.xml +13 -0
  332. data/tests/illformed/atom10/entry_source_title_content_type.xml +11 -0
  333. data/tests/illformed/atom10/entry_source_title_content_type_text.xml +11 -0
  334. data/tests/illformed/atom10/entry_source_title_content_value.xml +11 -0
  335. data/tests/illformed/atom10/entry_source_title_escaped_markup.xml +11 -0
  336. data/tests/illformed/atom10/entry_source_title_inline_markup.xml +11 -0
  337. data/tests/illformed/atom10/entry_source_title_inline_markup_2.xml +11 -0
  338. data/tests/illformed/atom10/entry_source_title_text_plain.xml +11 -0
  339. data/tests/illformed/atom10/entry_summary.xml +9 -0
  340. data/tests/illformed/atom10/entry_summary_base64.xml +11 -0
  341. data/tests/illformed/atom10/entry_summary_base64_2.xml +11 -0
  342. data/tests/illformed/atom10/entry_summary_content_value.xml +9 -0
  343. data/tests/illformed/atom10/entry_summary_escaped_markup.xml +9 -0
  344. data/tests/illformed/atom10/entry_summary_inline_markup.xml +9 -0
  345. data/tests/illformed/atom10/entry_summary_inline_markup_2.xml +9 -0
  346. data/tests/illformed/atom10/entry_summary_text_plain.xml +9 -0
  347. data/tests/illformed/atom10/entry_summary_type_default.xml +9 -0
  348. data/tests/illformed/atom10/entry_summary_type_text.xml +9 -0
  349. data/tests/illformed/atom10/entry_title.xml +9 -0
  350. data/tests/illformed/atom10/entry_title_base64.xml +11 -0
  351. data/tests/illformed/atom10/entry_title_base64_2.xml +11 -0
  352. data/tests/illformed/atom10/entry_title_content_value.xml +9 -0
  353. data/tests/illformed/atom10/entry_title_escaped_markup.xml +9 -0
  354. data/tests/illformed/atom10/entry_title_inline_markup.xml +9 -0
  355. data/tests/illformed/atom10/entry_title_inline_markup_2.xml +9 -0
  356. data/tests/illformed/atom10/entry_title_text_plain.xml +9 -0
  357. data/tests/illformed/atom10/entry_title_text_plain_brackets.xml +9 -0
  358. data/tests/illformed/atom10/entry_title_type_default.xml +9 -0
  359. data/tests/illformed/atom10/entry_title_type_text.xml +9 -0
  360. data/tests/illformed/atom10/feed_author_email.xml +11 -0
  361. data/tests/illformed/atom10/feed_author_map_author.xml +11 -0
  362. data/tests/illformed/atom10/feed_author_map_author_2.xml +10 -0
  363. data/tests/illformed/atom10/feed_author_name.xml +11 -0
  364. data/tests/illformed/atom10/feed_author_uri.xml +11 -0
  365. data/tests/illformed/atom10/feed_author_url.xml +11 -0
  366. data/tests/illformed/atom10/feed_contributor_email.xml +11 -0
  367. data/tests/illformed/atom10/feed_contributor_multiple.xml +16 -0
  368. data/tests/illformed/atom10/feed_contributor_name.xml +11 -0
  369. data/tests/illformed/atom10/feed_contributor_uri.xml +11 -0
  370. data/tests/illformed/atom10/feed_contributor_url.xml +11 -0
  371. data/tests/illformed/atom10/feed_generator.xml +7 -0
  372. data/tests/illformed/atom10/feed_generator_name.xml +7 -0
  373. data/tests/illformed/atom10/feed_generator_url.xml +7 -0
  374. data/tests/illformed/atom10/feed_generator_version.xml +7 -0
  375. data/tests/illformed/atom10/feed_icon.xml +7 -0
  376. data/tests/illformed/atom10/feed_id.xml +7 -0
  377. data/tests/illformed/atom10/feed_id_map_guid.xml +7 -0
  378. data/tests/illformed/atom10/feed_link_alternate_map_link.xml +7 -0
  379. data/tests/illformed/atom10/feed_link_alternate_map_link_2.xml +7 -0
  380. data/tests/illformed/atom10/feed_link_href.xml +7 -0
  381. data/tests/illformed/atom10/feed_link_hreflang.xml +7 -0
  382. data/tests/illformed/atom10/feed_link_length.xml +7 -0
  383. data/tests/illformed/atom10/feed_link_multiple.xml +8 -0
  384. data/tests/illformed/atom10/feed_link_no_rel.xml +7 -0
  385. data/tests/illformed/atom10/feed_link_rel.xml +7 -0
  386. data/tests/illformed/atom10/feed_link_rel_other.xml +7 -0
  387. data/tests/illformed/atom10/feed_link_rel_related.xml +7 -0
  388. data/tests/illformed/atom10/feed_link_rel_self.xml +7 -0
  389. data/tests/illformed/atom10/feed_link_rel_via.xml +7 -0
  390. data/tests/illformed/atom10/feed_link_title.xml +7 -0
  391. data/tests/illformed/atom10/feed_link_type.xml +7 -0
  392. data/tests/illformed/atom10/feed_logo.xml +7 -0
  393. data/tests/illformed/atom10/feed_rights.xml +7 -0
  394. data/tests/illformed/atom10/feed_rights_base64.xml +9 -0
  395. data/tests/illformed/atom10/feed_rights_base64_2.xml +9 -0
  396. data/tests/illformed/atom10/feed_rights_content_type.xml +7 -0
  397. data/tests/illformed/atom10/feed_rights_content_type_text.xml +7 -0
  398. data/tests/illformed/atom10/feed_rights_content_value.xml +7 -0
  399. data/tests/illformed/atom10/feed_rights_escaped_markup.xml +7 -0
  400. data/tests/illformed/atom10/feed_rights_inline_markup.xml +7 -0
  401. data/tests/illformed/atom10/feed_rights_inline_markup_2.xml +7 -0
  402. data/tests/illformed/atom10/feed_rights_text_plain.xml +7 -0
  403. data/tests/illformed/atom10/feed_subtitle.xml +7 -0
  404. data/tests/illformed/atom10/feed_subtitle_base64.xml +9 -0
  405. data/tests/illformed/atom10/feed_subtitle_base64_2.xml +9 -0
  406. data/tests/illformed/atom10/feed_subtitle_content_type.xml +7 -0
  407. data/tests/illformed/atom10/feed_subtitle_content_type_text.xml +7 -0
  408. data/tests/illformed/atom10/feed_subtitle_content_value.xml +7 -0
  409. data/tests/illformed/atom10/feed_subtitle_escaped_markup.xml +7 -0
  410. data/tests/illformed/atom10/feed_subtitle_inline_markup.xml +7 -0
  411. data/tests/illformed/atom10/feed_subtitle_inline_markup_2.xml +7 -0
  412. data/tests/illformed/atom10/feed_subtitle_text_plain.xml +7 -0
  413. data/tests/illformed/atom10/feed_title.xml +7 -0
  414. data/tests/illformed/atom10/feed_title_base64.xml +9 -0
  415. data/tests/illformed/atom10/feed_title_base64_2.xml +9 -0
  416. data/tests/illformed/atom10/feed_title_content_type.xml +7 -0
  417. data/tests/illformed/atom10/feed_title_content_type_text.xml +7 -0
  418. data/tests/illformed/atom10/feed_title_content_value.xml +7 -0
  419. data/tests/illformed/atom10/feed_title_escaped_markup.xml +7 -0
  420. data/tests/illformed/atom10/feed_title_inline_markup.xml +7 -0
  421. data/tests/illformed/atom10/feed_title_inline_markup_2.xml +7 -0
  422. data/tests/illformed/atom10/feed_title_text_plain.xml +7 -0
  423. data/tests/illformed/atom10/relative_uri.xml +7 -0
  424. data/tests/illformed/atom10/relative_uri_inherit.xml +7 -0
  425. data/tests/illformed/atom10/relative_uri_inherit_2.xml +7 -0
  426. data/tests/illformed/base/cdf_item_abstract_xml_base.xml +18 -0
  427. data/tests/illformed/base/entry_content_xml_base.xml +9 -0
  428. data/tests/illformed/base/entry_content_xml_base_inherit.xml +9 -0
  429. data/tests/illformed/base/entry_content_xml_base_inherit_2.xml +9 -0
  430. data/tests/illformed/base/entry_content_xml_base_inherit_3.xml +10 -0
  431. data/tests/illformed/base/entry_content_xml_base_inherit_4.xml +10 -0
  432. data/tests/illformed/base/entry_summary_xml_base.xml +9 -0
  433. data/tests/illformed/base/entry_summary_xml_base_inherit.xml +9 -0
  434. data/tests/illformed/base/entry_summary_xml_base_inherit_2.xml +9 -0
  435. data/tests/illformed/base/entry_summary_xml_base_inherit_3.xml +10 -0
  436. data/tests/illformed/base/entry_summary_xml_base_inherit_4.xml +10 -0
  437. data/tests/illformed/base/entry_title_xml_base.xml +9 -0
  438. data/tests/illformed/base/entry_title_xml_base_inherit.xml +9 -0
  439. data/tests/illformed/base/entry_title_xml_base_inherit_2.xml +9 -0
  440. data/tests/illformed/base/entry_title_xml_base_inherit_3.xml +10 -0
  441. data/tests/illformed/base/entry_title_xml_base_inherit_4.xml +10 -0
  442. data/tests/illformed/base/feed_copyright_xml_base.xml +7 -0
  443. data/tests/illformed/base/feed_copyright_xml_base_inherit.xml +7 -0
  444. data/tests/illformed/base/feed_copyright_xml_base_inherit_2.xml +7 -0
  445. data/tests/illformed/base/feed_copyright_xml_base_inherit_3.xml +8 -0
  446. data/tests/illformed/base/feed_copyright_xml_base_inherit_4.xml +8 -0
  447. data/tests/illformed/base/feed_info_xml_base.xml +7 -0
  448. data/tests/illformed/base/feed_info_xml_base_inherit.xml +7 -0
  449. data/tests/illformed/base/feed_info_xml_base_inherit_2.xml +7 -0
  450. data/tests/illformed/base/feed_info_xml_base_inherit_3.xml +8 -0
  451. data/tests/illformed/base/feed_info_xml_base_inherit_4.xml +8 -0
  452. data/tests/illformed/base/feed_tagline_xml_base.xml +7 -0
  453. data/tests/illformed/base/feed_tagline_xml_base_inherit.xml +7 -0
  454. data/tests/illformed/base/feed_tagline_xml_base_inherit_2.xml +7 -0
  455. data/tests/illformed/base/feed_tagline_xml_base_inherit_3.xml +8 -0
  456. data/tests/illformed/base/feed_tagline_xml_base_inherit_4.xml +8 -0
  457. data/tests/illformed/base/feed_title_xml_base.xml +7 -0
  458. data/tests/illformed/base/feed_title_xml_base_inherit.xml +7 -0
  459. data/tests/illformed/base/feed_title_xml_base_inherit_2.xml +7 -0
  460. data/tests/illformed/base/feed_title_xml_base_inherit_3.xml +8 -0
  461. data/tests/illformed/base/feed_title_xml_base_inherit_4.xml +8 -0
  462. data/tests/illformed/base/http_channel_docs_base_content_location.xml +10 -0
  463. data/tests/illformed/base/http_channel_docs_base_docuri.xml +9 -0
  464. data/tests/illformed/base/http_channel_link_base_content_location.xml +10 -0
  465. data/tests/illformed/base/http_channel_link_base_docuri.xml +9 -0
  466. data/tests/illformed/base/http_entry_author_url_base_content_location.xml +12 -0
  467. data/tests/illformed/base/http_entry_author_url_base_docuri.xml +11 -0
  468. data/tests/illformed/base/http_entry_content_base64_base_content_location.xml +12 -0
  469. data/tests/illformed/base/http_entry_content_base64_base_docuri.xml +11 -0
  470. data/tests/illformed/base/http_entry_content_base_content_location.xml +10 -0
  471. data/tests/illformed/base/http_entry_content_base_docuri.xml +9 -0
  472. data/tests/illformed/base/http_entry_content_inline_base_content_location.xml +10 -0
  473. data/tests/illformed/base/http_entry_content_inline_base_docuri.xml +9 -0
  474. data/tests/illformed/base/http_entry_contributor_url_base_content_location.xml +12 -0
  475. data/tests/illformed/base/http_entry_contributor_url_base_docuri.xml +11 -0
  476. data/tests/illformed/base/http_entry_id_base_content_location.xml +10 -0
  477. data/tests/illformed/base/http_entry_id_base_docuri.xml +9 -0
  478. data/tests/illformed/base/http_entry_link_base_content_location.xml +10 -0
  479. data/tests/illformed/base/http_entry_link_base_docuri.xml +9 -0
  480. data/tests/illformed/base/http_entry_summary_base64_base_content_location.xml +12 -0
  481. data/tests/illformed/base/http_entry_summary_base64_base_docuri.xml +11 -0
  482. data/tests/illformed/base/http_entry_summary_base_content_location.xml +10 -0
  483. data/tests/illformed/base/http_entry_summary_base_docuri.xml +9 -0
  484. data/tests/illformed/base/http_entry_summary_inline_base_content_location.xml +10 -0
  485. data/tests/illformed/base/http_entry_summary_inline_base_docuri.xml +9 -0
  486. data/tests/illformed/base/http_entry_title_base64_base_content_location.xml +12 -0
  487. data/tests/illformed/base/http_entry_title_base64_base_docuri.xml +11 -0
  488. data/tests/illformed/base/http_entry_title_base_content_location.xml +10 -0
  489. data/tests/illformed/base/http_entry_title_base_docuri.xml +9 -0
  490. data/tests/illformed/base/http_entry_title_inline_base_content_location.xml +10 -0
  491. data/tests/illformed/base/http_entry_title_inline_base_docuri.xml +9 -0
  492. data/tests/illformed/base/http_feed_author_url_base_content_location.xml +10 -0
  493. data/tests/illformed/base/http_feed_author_url_base_docuri.xml +9 -0
  494. data/tests/illformed/base/http_feed_contributor_url_base_content_location.xml +10 -0
  495. data/tests/illformed/base/http_feed_contributor_url_base_docuri.xml +9 -0
  496. data/tests/illformed/base/http_feed_copyright_base64_base_content_location.xml +10 -0
  497. data/tests/illformed/base/http_feed_copyright_base64_base_docuri.xml +9 -0
  498. data/tests/illformed/base/http_feed_copyright_base_content_location.xml +8 -0
  499. data/tests/illformed/base/http_feed_copyright_base_docuri.xml +7 -0
  500. data/tests/illformed/base/http_feed_copyright_inline_base_content_location.xml +8 -0
  501. data/tests/illformed/base/http_feed_copyright_inline_base_docuri.xml +7 -0
  502. data/tests/illformed/base/http_feed_generator_url_base_content_location.xml +8 -0
  503. data/tests/illformed/base/http_feed_generator_url_base_docuri.xml +7 -0
  504. data/tests/illformed/base/http_feed_id_base_content_location.xml +8 -0
  505. data/tests/illformed/base/http_feed_id_base_docuri.xml +7 -0
  506. data/tests/illformed/base/http_feed_info_base64_base_content_location.xml +10 -0
  507. data/tests/illformed/base/http_feed_info_base64_base_docuri.xml +9 -0
  508. data/tests/illformed/base/http_feed_info_base_content_location.xml +8 -0
  509. data/tests/illformed/base/http_feed_info_base_docuri.xml +7 -0
  510. data/tests/illformed/base/http_feed_info_inline_base_content_location.xml +8 -0
  511. data/tests/illformed/base/http_feed_info_inline_base_docuri.xml +7 -0
  512. data/tests/illformed/base/http_feed_link_base_content_location.xml +8 -0
  513. data/tests/illformed/base/http_feed_link_base_docuri.xml +7 -0
  514. data/tests/illformed/base/http_feed_tagline_base64_base_content_location.xml +10 -0
  515. data/tests/illformed/base/http_feed_tagline_base64_base_docuri.xml +9 -0
  516. data/tests/illformed/base/http_feed_tagline_base_content_location.xml +8 -0
  517. data/tests/illformed/base/http_feed_tagline_base_docuri.xml +7 -0
  518. data/tests/illformed/base/http_feed_tagline_inline_base_content_location.xml +8 -0
  519. data/tests/illformed/base/http_feed_tagline_inline_base_docuri.xml +7 -0
  520. data/tests/illformed/base/http_feed_title_base64_base_content_location.xml +10 -0
  521. data/tests/illformed/base/http_feed_title_base64_base_docuri.xml +9 -0
  522. data/tests/illformed/base/http_feed_title_base_content_location.xml +8 -0
  523. data/tests/illformed/base/http_feed_title_base_docuri.xml +7 -0
  524. data/tests/illformed/base/http_feed_title_inline_base_content_location.xml +8 -0
  525. data/tests/illformed/base/http_feed_title_inline_base_docuri.xml +7 -0
  526. data/tests/illformed/base/http_item_body_base_content_location.xml +12 -0
  527. data/tests/illformed/base/http_item_body_base_docuri.xml +11 -0
  528. data/tests/illformed/base/http_item_comments_base_content_location.xml +12 -0
  529. data/tests/illformed/base/http_item_comments_base_docuri.xml +11 -0
  530. data/tests/illformed/base/http_item_content_encoded_base_content_location.xml +12 -0
  531. data/tests/illformed/base/http_item_content_encoded_base_docuri.xml +11 -0
  532. data/tests/illformed/base/http_item_description_base_content_location.xml +12 -0
  533. data/tests/illformed/base/http_item_description_base_docuri.xml +11 -0
  534. data/tests/illformed/base/http_item_fullitem_base_content_location.xml +12 -0
  535. data/tests/illformed/base/http_item_fullitem_base_docuri.xml +11 -0
  536. data/tests/illformed/base/http_item_link_base_content_location.xml +12 -0
  537. data/tests/illformed/base/http_item_link_base_docuri.xml +11 -0
  538. data/tests/illformed/base/http_item_wfw_commentRSS_base_content_location.xml +12 -0
  539. data/tests/illformed/base/http_item_wfw_commentRSS_base_docuri.xml +11 -0
  540. data/tests/illformed/base/http_item_wfw_comment_base_content_location.xml +12 -0
  541. data/tests/illformed/base/http_item_wfw_comment_base_docuri.xml +11 -0
  542. data/tests/illformed/base/http_item_xhtml_body_base_content_location.xml +12 -0
  543. data/tests/illformed/base/http_item_xhtml_body_base_docuri.xml +11 -0
  544. data/tests/illformed/base/http_relative_xml_base.xml +10 -0
  545. data/tests/illformed/base/malformed_base.xml +9 -0
  546. data/tests/illformed/base/relative_xml_base.xml +9 -0
  547. data/tests/illformed/base/relative_xml_base_2.xml +9 -0
  548. data/tests/illformed/cdf/channel_abstract_map_description.xml +7 -0
  549. data/tests/illformed/cdf/channel_abstract_map_tagline.xml +7 -0
  550. data/tests/illformed/cdf/channel_href_map_link.xml +6 -0
  551. data/tests/illformed/cdf/channel_href_map_links.xml +6 -0
  552. data/tests/illformed/cdf/channel_title.xml +7 -0
  553. data/tests/illformed/cdf/item_abstract_map_description.xml +9 -0
  554. data/tests/illformed/cdf/item_abstract_map_summary.xml +9 -0
  555. data/tests/illformed/cdf/item_href_map_link.xml +8 -0
  556. data/tests/illformed/cdf/item_href_map_links.xml +8 -0
  557. data/tests/illformed/cdf/item_title.xml +9 -0
  558. data/tests/illformed/chardet/big5.xml +8 -0
  559. data/tests/illformed/chardet/eucjp.xml +13 -0
  560. data/tests/illformed/chardet/euckr.xml +13 -0
  561. data/tests/illformed/chardet/gb2312.xml +12 -0
  562. data/tests/illformed/chardet/koi8r.xml +14 -0
  563. data/tests/illformed/chardet/shiftjis.xml +11 -0
  564. data/tests/illformed/chardet/tis620.xml +12 -0
  565. data/tests/illformed/chardet/windows1255.xml +14 -0
  566. data/tests/illformed/date/cdf_channel_lastmod_map_date.xml +6 -0
  567. data/tests/illformed/date/cdf_channel_lastmod_map_modified.xml +6 -0
  568. data/tests/illformed/date/cdf_channel_lastmod_map_modified_parsed.xml +6 -0
  569. data/tests/illformed/date/cdf_item_lastmod_map_date.xml +8 -0
  570. data/tests/illformed/date/cdf_item_lastmod_map_modified.xml +8 -0
  571. data/tests/illformed/date/cdf_item_lastmod_map_modified_parsed.xml +8 -0
  572. data/tests/illformed/date/channel_dc_date.xml +9 -0
  573. data/tests/illformed/date/channel_dc_date_map_modified.xml +9 -0
  574. data/tests/illformed/date/channel_dc_date_w3dtf_utc.xml +9 -0
  575. data/tests/illformed/date/channel_dc_date_w3dtf_utc_map_modified_parsed.xml +9 -0
  576. data/tests/illformed/date/channel_dcterms_created.xml +9 -0
  577. data/tests/illformed/date/channel_dcterms_created_w3dtf_utc.xml +9 -0
  578. data/tests/illformed/date/channel_dcterms_issued.xml +9 -0
  579. data/tests/illformed/date/channel_dcterms_issued_w3dtf_utc.xml +9 -0
  580. data/tests/illformed/date/channel_dcterms_modified.xml +9 -0
  581. data/tests/illformed/date/channel_dcterms_modified_map_date.xml +9 -0
  582. data/tests/illformed/date/channel_dcterms_modified_w3dtf_utc.xml +9 -0
  583. data/tests/illformed/date/channel_dcterms_modified_w3dtf_utc_map_date.xml +9 -0
  584. data/tests/illformed/date/channel_pubDate.xml +9 -0
  585. data/tests/illformed/date/channel_pubDate_asctime.xml +9 -0
  586. data/tests/illformed/date/channel_pubDate_disney.xml +9 -0
  587. data/tests/illformed/date/channel_pubDate_disney_at.xml +9 -0
  588. data/tests/illformed/date/channel_pubDate_disney_ct.xml +9 -0
  589. data/tests/illformed/date/channel_pubDate_disney_mt.xml +9 -0
  590. data/tests/illformed/date/channel_pubDate_disney_pt.xml +9 -0
  591. data/tests/illformed/date/channel_pubDate_greek_1.xml +9 -0
  592. data/tests/illformed/date/channel_pubDate_hungarian_1.xml +9 -0
  593. data/tests/illformed/date/channel_pubDate_iso8601_ym.xml +9 -0
  594. data/tests/illformed/date/channel_pubDate_iso8601_ym_2.xml +9 -0
  595. data/tests/illformed/date/channel_pubDate_iso8601_ymd.xml +9 -0
  596. data/tests/illformed/date/channel_pubDate_iso8601_ymd_2.xml +9 -0
  597. data/tests/illformed/date/channel_pubDate_iso8601_yo_2.xml +9 -0
  598. data/tests/illformed/date/channel_pubDate_korean_nate.xml +11 -0
  599. data/tests/illformed/date/channel_pubDate_map_modified.xml +9 -0
  600. data/tests/illformed/date/channel_pubDate_mssql.xml +9 -0
  601. data/tests/illformed/date/channel_pubDate_mssql_nofraction.xml +9 -0
  602. data/tests/illformed/date/channel_pubDate_nosecond.xml +9 -0
  603. data/tests/illformed/date/channel_pubDate_notime.xml +9 -0
  604. data/tests/illformed/date/channel_pubDate_rfc2822.xml +9 -0
  605. data/tests/illformed/date/channel_pubDate_rfc2822_rollover_june_31.xml +9 -0
  606. data/tests/illformed/date/channel_pubDate_rfc822.xml +9 -0
  607. data/tests/illformed/date/channel_pubDate_w3dtf_rollover_61m.xml +9 -0
  608. data/tests/illformed/date/channel_pubDate_w3dtf_rollover_61s.xml +9 -0
  609. data/tests/illformed/date/channel_pubDate_w3dtf_rollover_leapyear.xml +9 -0
  610. data/tests/illformed/date/channel_pubDate_w3dtf_rollover_leapyear400.xml +9 -0
  611. data/tests/illformed/date/channel_pubDate_w3dtf_rollover_nonleapyear.xml +9 -0
  612. data/tests/illformed/date/channel_pubDate_w3dtf_sf.xml +9 -0
  613. data/tests/illformed/date/channel_pubDate_w3dtf_tokyo.xml +9 -0
  614. data/tests/illformed/date/channel_pubDate_w3dtf_utc.xml +9 -0
  615. data/tests/illformed/date/channel_pubDate_w3dtf_y.xml +9 -0
  616. data/tests/illformed/date/channel_pubDate_w3dtf_ym.xml +9 -0
  617. data/tests/illformed/date/channel_pubDate_w3dtf_ymd.xml +9 -0
  618. data/tests/illformed/date/channel_pubDate_w3dtf_ymd_2.xml +9 -0
  619. data/tests/illformed/date/entry_created.xml +9 -0
  620. data/tests/illformed/date/entry_created_w3dtf_utc.xml +9 -0
  621. data/tests/illformed/date/entry_issued.xml +9 -0
  622. data/tests/illformed/date/entry_issued_w3dtf_utc.xml +9 -0
  623. data/tests/illformed/date/entry_modified.xml +9 -0
  624. data/tests/illformed/date/entry_modified_map_date.xml +9 -0
  625. data/tests/illformed/date/entry_modified_w3dtf_utc.xml +9 -0
  626. data/tests/illformed/date/entry_published_w3dtf_utc.xml +9 -0
  627. data/tests/illformed/date/entry_source_updated_w3dtf_utc.xml +11 -0
  628. data/tests/illformed/date/entry_updated_w3dtf_utc.xml +9 -0
  629. data/tests/illformed/date/feed_modified.xml +9 -0
  630. data/tests/illformed/date/feed_modified_asctime.xml +9 -0
  631. data/tests/illformed/date/feed_modified_disney.xml +7 -0
  632. data/tests/illformed/date/feed_modified_disney_at.xml +7 -0
  633. data/tests/illformed/date/feed_modified_disney_ct.xml +7 -0
  634. data/tests/illformed/date/feed_modified_disney_mt.xml +7 -0
  635. data/tests/illformed/date/feed_modified_disney_pt.xml +7 -0
  636. data/tests/illformed/date/feed_modified_iso8601_ym.xml +9 -0
  637. data/tests/illformed/date/feed_modified_iso8601_ym_2.xml +9 -0
  638. data/tests/illformed/date/feed_modified_iso8601_ymd.xml +9 -0
  639. data/tests/illformed/date/feed_modified_iso8601_ymd_2.xml +9 -0
  640. data/tests/illformed/date/feed_modified_iso8601_yo_2.xml +9 -0
  641. data/tests/illformed/date/feed_modified_map_date.xml +9 -0
  642. data/tests/illformed/date/feed_modified_rfc2822.xml +9 -0
  643. data/tests/illformed/date/feed_modified_rfc2822_rollover_june_31.xml +9 -0
  644. data/tests/illformed/date/feed_modified_rfc822.xml +9 -0
  645. data/tests/illformed/date/feed_modified_w3dtf_rollover_leapyear.xml +9 -0
  646. data/tests/illformed/date/feed_modified_w3dtf_rollover_leapyear400.xml +9 -0
  647. data/tests/illformed/date/feed_modified_w3dtf_rollover_nonleapyear.xml +9 -0
  648. data/tests/illformed/date/feed_modified_w3dtf_sf.xml +9 -0
  649. data/tests/illformed/date/feed_modified_w3dtf_tokyo.xml +9 -0
  650. data/tests/illformed/date/feed_modified_w3dtf_utc.xml +9 -0
  651. data/tests/illformed/date/feed_modified_w3dtf_y.xml +9 -0
  652. data/tests/illformed/date/feed_modified_w3dtf_ym.xml +9 -0
  653. data/tests/illformed/date/feed_modified_w3dtf_ymd.xml +9 -0
  654. data/tests/illformed/date/feed_modified_w3dtf_ymd_2.xml +9 -0
  655. data/tests/illformed/date/feed_updated_w3dtf_utc.xml +7 -0
  656. data/tests/illformed/date/http_high_bit_date.xml +12 -0
  657. data/tests/illformed/date/item_dc_date.xml +11 -0
  658. data/tests/illformed/date/item_dc_date_map_modified.xml +11 -0
  659. data/tests/illformed/date/item_dc_date_w3dtf_utc.xml +11 -0
  660. data/tests/illformed/date/item_dc_date_w3dtf_utc_map_modified_parsed.xml +11 -0
  661. data/tests/illformed/date/item_dcterms_created.xml +11 -0
  662. data/tests/illformed/date/item_dcterms_created_w3dtf_utc.xml +11 -0
  663. data/tests/illformed/date/item_dcterms_issued.xml +11 -0
  664. data/tests/illformed/date/item_dcterms_issued_w3dtf_utc.xml +11 -0
  665. data/tests/illformed/date/item_dcterms_modified.xml +11 -0
  666. data/tests/illformed/date/item_dcterms_modified_map_date.xml +11 -0
  667. data/tests/illformed/date/item_dcterms_modified_w3dtf_utc.xml +11 -0
  668. data/tests/illformed/date/item_dcterms_modified_w3dtf_utc_map_date.xml +11 -0
  669. data/tests/illformed/date/item_expirationDate.xml +11 -0
  670. data/tests/illformed/date/item_expirationDate_rfc2822.xml +11 -0
  671. data/tests/illformed/date/item_pubDate.xml +11 -0
  672. data/tests/illformed/date/item_pubDate_euc-kr.xml +13 -0
  673. data/tests/illformed/date/item_pubDate_map_modified.xml +11 -0
  674. data/tests/illformed/date/item_pubDate_rfc2822.xml +11 -0
  675. data/tests/illformed/encoding/bogus_encoding.xml +7 -0
  676. data/tests/illformed/encoding/encoding_mismatch_crash.xml +10 -0
  677. data/tests/illformed/encoding/http_i18n.xml +13 -0
  678. data/tests/illformed/encoding/http_text_plain.xml +8 -0
  679. data/tests/illformed/encoding/http_text_plain_charset.xml +8 -0
  680. data/tests/illformed/encoding/utf-16be-autodetect.xml +0 -0
  681. data/tests/illformed/encoding/utf-16be-bom.xml +0 -0
  682. data/tests/illformed/encoding/utf-16be.xml +0 -0
  683. data/tests/illformed/encoding/utf-16le-autodetect.xml +0 -0
  684. data/tests/illformed/encoding/utf-16le-bom.xml +0 -0
  685. data/tests/illformed/encoding/utf-16le.xml +0 -0
  686. data/tests/illformed/encoding/utf-32be-autodetect.xml +0 -0
  687. data/tests/illformed/encoding/utf-32be-bom.xml +0 -0
  688. data/tests/illformed/encoding/utf-32be.xml +0 -0
  689. data/tests/illformed/encoding/utf-32le-autodetect.xml +0 -0
  690. data/tests/illformed/encoding/utf-32le-bom.xml +0 -0
  691. data/tests/illformed/encoding/utf-32le.xml +0 -0
  692. data/tests/illformed/encoding/utf-8-bom.xml +8 -0
  693. data/tests/illformed/encoding/x80_437.xml +9 -0
  694. data/tests/illformed/encoding/x80_850.xml +9 -0
  695. data/tests/illformed/encoding/x80_852.xml +9 -0
  696. data/tests/illformed/encoding/x80_855.xml +9 -0
  697. data/tests/illformed/encoding/x80_857.xml +9 -0
  698. data/tests/illformed/encoding/x80_860.xml +9 -0
  699. data/tests/illformed/encoding/x80_861.xml +9 -0
  700. data/tests/illformed/encoding/x80_862.xml +9 -0
  701. data/tests/illformed/encoding/x80_863.xml +9 -0
  702. data/tests/illformed/encoding/x80_865.xml +9 -0
  703. data/tests/illformed/encoding/x80_866.xml +9 -0
  704. data/tests/illformed/encoding/x80_cp037.xml +1 -0
  705. data/tests/illformed/encoding/x80_cp1125.xml +9 -0
  706. data/tests/illformed/encoding/x80_cp1250.xml +9 -0
  707. data/tests/illformed/encoding/x80_cp1251.xml +9 -0
  708. data/tests/illformed/encoding/x80_cp1252.xml +9 -0
  709. data/tests/illformed/encoding/x80_cp1253.xml +9 -0
  710. data/tests/illformed/encoding/x80_cp1254.xml +9 -0
  711. data/tests/illformed/encoding/x80_cp1255.xml +9 -0
  712. data/tests/illformed/encoding/x80_cp1256.xml +9 -0
  713. data/tests/illformed/encoding/x80_cp1257.xml +9 -0
  714. data/tests/illformed/encoding/x80_cp1258.xml +9 -0
  715. data/tests/illformed/encoding/x80_cp437.xml +9 -0
  716. data/tests/illformed/encoding/x80_cp500.xml +1 -0
  717. data/tests/illformed/encoding/x80_cp737.xml +9 -0
  718. data/tests/illformed/encoding/x80_cp775.xml +9 -0
  719. data/tests/illformed/encoding/x80_cp850.xml +9 -0
  720. data/tests/illformed/encoding/x80_cp852.xml +9 -0
  721. data/tests/illformed/encoding/x80_cp855.xml +9 -0
  722. data/tests/illformed/encoding/x80_cp856.xml +9 -0
  723. data/tests/illformed/encoding/x80_cp857.xml +9 -0
  724. data/tests/illformed/encoding/x80_cp860.xml +9 -0
  725. data/tests/illformed/encoding/x80_cp861.xml +9 -0
  726. data/tests/illformed/encoding/x80_cp862.xml +9 -0
  727. data/tests/illformed/encoding/x80_cp863.xml +9 -0
  728. data/tests/illformed/encoding/x80_cp864.xml +9 -0
  729. data/tests/illformed/encoding/x80_cp865.xml +9 -0
  730. data/tests/illformed/encoding/x80_cp866.xml +9 -0
  731. data/tests/illformed/encoding/x80_cp874.xml +9 -0
  732. data/tests/illformed/encoding/x80_cp875.xml +1 -0
  733. data/tests/illformed/encoding/x80_cp_is.xml +9 -0
  734. data/tests/illformed/encoding/x80_csibm037.xml +1 -0
  735. data/tests/illformed/encoding/x80_csibm500.xml +1 -0
  736. data/tests/illformed/encoding/x80_csibm855.xml +9 -0
  737. data/tests/illformed/encoding/x80_csibm857.xml +9 -0
  738. data/tests/illformed/encoding/x80_csibm860.xml +9 -0
  739. data/tests/illformed/encoding/x80_csibm861.xml +9 -0
  740. data/tests/illformed/encoding/x80_csibm863.xml +9 -0
  741. data/tests/illformed/encoding/x80_csibm864.xml +9 -0
  742. data/tests/illformed/encoding/x80_csibm865.xml +9 -0
  743. data/tests/illformed/encoding/x80_csibm866.xml +9 -0
  744. data/tests/illformed/encoding/x80_cskoi8r.xml +9 -0
  745. data/tests/illformed/encoding/x80_csmacintosh.xml +9 -0
  746. data/tests/illformed/encoding/x80_cspc775baltic.xml +9 -0
  747. data/tests/illformed/encoding/x80_cspc850multilingual.xml +9 -0
  748. data/tests/illformed/encoding/x80_cspc862latinhebrew.xml +9 -0
  749. data/tests/illformed/encoding/x80_cspc8codepage437.xml +9 -0
  750. data/tests/illformed/encoding/x80_cspcp852.xml +9 -0
  751. data/tests/illformed/encoding/x80_dbcs.xml +9 -0
  752. data/tests/illformed/encoding/x80_ebcdic-cp-be.xml +1 -0
  753. data/tests/illformed/encoding/x80_ebcdic-cp-ca.xml +1 -0
  754. data/tests/illformed/encoding/x80_ebcdic-cp-ch.xml +1 -0
  755. data/tests/illformed/encoding/x80_ebcdic-cp-nl.xml +1 -0
  756. data/tests/illformed/encoding/x80_ebcdic-cp-us.xml +1 -0
  757. data/tests/illformed/encoding/x80_ebcdic-cp-wt.xml +1 -0
  758. data/tests/illformed/encoding/x80_ebcdic_cp_be.xml +1 -0
  759. data/tests/illformed/encoding/x80_ebcdic_cp_ca.xml +1 -0
  760. data/tests/illformed/encoding/x80_ebcdic_cp_ch.xml +1 -0
  761. data/tests/illformed/encoding/x80_ebcdic_cp_nl.xml +1 -0
  762. data/tests/illformed/encoding/x80_ebcdic_cp_us.xml +1 -0
  763. data/tests/illformed/encoding/x80_ebcdic_cp_wt.xml +1 -0
  764. data/tests/illformed/encoding/x80_ibm037.xml +1 -0
  765. data/tests/illformed/encoding/x80_ibm039.xml +1 -0
  766. data/tests/illformed/encoding/x80_ibm1140.xml +1 -0
  767. data/tests/illformed/encoding/x80_ibm437.xml +9 -0
  768. data/tests/illformed/encoding/x80_ibm500.xml +1 -0
  769. data/tests/illformed/encoding/x80_ibm775.xml +9 -0
  770. data/tests/illformed/encoding/x80_ibm850.xml +9 -0
  771. data/tests/illformed/encoding/x80_ibm852.xml +9 -0
  772. data/tests/illformed/encoding/x80_ibm855.xml +9 -0
  773. data/tests/illformed/encoding/x80_ibm857.xml +9 -0
  774. data/tests/illformed/encoding/x80_ibm860.xml +9 -0
  775. data/tests/illformed/encoding/x80_ibm861.xml +9 -0
  776. data/tests/illformed/encoding/x80_ibm862.xml +9 -0
  777. data/tests/illformed/encoding/x80_ibm863.xml +9 -0
  778. data/tests/illformed/encoding/x80_ibm864.xml +9 -0
  779. data/tests/illformed/encoding/x80_ibm865.xml +9 -0
  780. data/tests/illformed/encoding/x80_ibm866.xml +9 -0
  781. data/tests/illformed/encoding/x80_koi8-r.xml +9 -0
  782. data/tests/illformed/encoding/x80_koi8-t.xml +9 -0
  783. data/tests/illformed/encoding/x80_koi8-u.xml +9 -0
  784. data/tests/illformed/encoding/x80_mac-cyrillic.xml +9 -0
  785. data/tests/illformed/encoding/x80_mac.xml +9 -0
  786. data/tests/illformed/encoding/x80_maccentraleurope.xml +9 -0
  787. data/tests/illformed/encoding/x80_maccyrillic.xml +9 -0
  788. data/tests/illformed/encoding/x80_macgreek.xml +9 -0
  789. data/tests/illformed/encoding/x80_maciceland.xml +9 -0
  790. data/tests/illformed/encoding/x80_macintosh.xml +9 -0
  791. data/tests/illformed/encoding/x80_maclatin2.xml +9 -0
  792. data/tests/illformed/encoding/x80_macroman.xml +9 -0
  793. data/tests/illformed/encoding/x80_macturkish.xml +9 -0
  794. data/tests/illformed/encoding/x80_ms-ansi.xml +9 -0
  795. data/tests/illformed/encoding/x80_ms-arab.xml +9 -0
  796. data/tests/illformed/encoding/x80_ms-cyrl.xml +9 -0
  797. data/tests/illformed/encoding/x80_ms-ee.xml +9 -0
  798. data/tests/illformed/encoding/x80_ms-greek.xml +9 -0
  799. data/tests/illformed/encoding/x80_ms-hebr.xml +9 -0
  800. data/tests/illformed/encoding/x80_ms-turk.xml +9 -0
  801. data/tests/illformed/encoding/x80_tcvn-5712.xml +9 -0
  802. data/tests/illformed/encoding/x80_tcvn.xml +9 -0
  803. data/tests/illformed/encoding/x80_tcvn5712-1.xml +9 -0
  804. data/tests/illformed/encoding/x80_viscii.xml +9 -0
  805. data/tests/illformed/encoding/x80_winbaltrim.xml +9 -0
  806. data/tests/illformed/encoding/x80_windows-1250.xml +9 -0
  807. data/tests/illformed/encoding/x80_windows-1251.xml +9 -0
  808. data/tests/illformed/encoding/x80_windows-1252.xml +9 -0
  809. data/tests/illformed/encoding/x80_windows-1253.xml +9 -0
  810. data/tests/illformed/encoding/x80_windows-1254.xml +9 -0
  811. data/tests/illformed/encoding/x80_windows-1255.xml +9 -0
  812. data/tests/illformed/encoding/x80_windows-1256.xml +9 -0
  813. data/tests/illformed/encoding/x80_windows-1257.xml +9 -0
  814. data/tests/illformed/encoding/x80_windows-1258.xml +9 -0
  815. data/tests/illformed/encoding/x80_windows_1250.xml +9 -0
  816. data/tests/illformed/encoding/x80_windows_1251.xml +9 -0
  817. data/tests/illformed/encoding/x80_windows_1252.xml +9 -0
  818. data/tests/illformed/encoding/x80_windows_1253.xml +9 -0
  819. data/tests/illformed/encoding/x80_windows_1254.xml +9 -0
  820. data/tests/illformed/encoding/x80_windows_1255.xml +9 -0
  821. data/tests/illformed/encoding/x80_windows_1256.xml +9 -0
  822. data/tests/illformed/encoding/x80_windows_1257.xml +9 -0
  823. data/tests/illformed/encoding/x80_windows_1258.xml +9 -0
  824. data/tests/illformed/entities/160.xml +9 -0
  825. data/tests/illformed/entities/732.xml +9 -0
  826. data/tests/illformed/entities/8216.xml +9 -0
  827. data/tests/illformed/entities/8217.xml +9 -0
  828. data/tests/illformed/entities/8220.xml +9 -0
  829. data/tests/illformed/entities/8221.xml +9 -0
  830. data/tests/illformed/entities/9830.xml +9 -0
  831. data/tests/illformed/entities/aacute.xml +9 -0
  832. data/tests/illformed/entities/acirc.xml +9 -0
  833. data/tests/illformed/entities/acute.xml +9 -0
  834. data/tests/illformed/entities/aelig.xml +9 -0
  835. data/tests/illformed/entities/agrave.xml +9 -0
  836. data/tests/illformed/entities/alefsym.xml +9 -0
  837. data/tests/illformed/entities/alpha.xml +9 -0
  838. data/tests/illformed/entities/and.xml +9 -0
  839. data/tests/illformed/entities/ang.xml +9 -0
  840. data/tests/illformed/entities/aring.xml +9 -0
  841. data/tests/illformed/entities/asymp.xml +9 -0
  842. data/tests/illformed/entities/atilde.xml +9 -0
  843. data/tests/illformed/entities/auml.xml +9 -0
  844. data/tests/illformed/entities/bdquo.xml +9 -0
  845. data/tests/illformed/entities/beta.xml +9 -0
  846. data/tests/illformed/entities/brvbar.xml +9 -0
  847. data/tests/illformed/entities/bull.xml +9 -0
  848. data/tests/illformed/entities/cap.xml +9 -0
  849. data/tests/illformed/entities/ccedil.xml +9 -0
  850. data/tests/illformed/entities/cedil.xml +9 -0
  851. data/tests/illformed/entities/cent.xml +9 -0
  852. data/tests/illformed/entities/chi.xml +9 -0
  853. data/tests/illformed/entities/circ.xml +9 -0
  854. data/tests/illformed/entities/clubs.xml +9 -0
  855. data/tests/illformed/entities/cong.xml +9 -0
  856. data/tests/illformed/entities/copy.xml +9 -0
  857. data/tests/illformed/entities/crarr.xml +9 -0
  858. data/tests/illformed/entities/cup.xml +9 -0
  859. data/tests/illformed/entities/curren.xml +9 -0
  860. data/tests/illformed/entities/dagger.xml +9 -0
  861. data/tests/illformed/entities/darr.xml +9 -0
  862. data/tests/illformed/entities/deg.xml +9 -0
  863. data/tests/illformed/entities/delta.xml +9 -0
  864. data/tests/illformed/entities/diams.xml +9 -0
  865. data/tests/illformed/entities/divide.xml +9 -0
  866. data/tests/illformed/entities/doesnotexist.xml +9 -0
  867. data/tests/illformed/entities/eacute.xml +9 -0
  868. data/tests/illformed/entities/ecirc.xml +9 -0
  869. data/tests/illformed/entities/egrave.xml +9 -0
  870. data/tests/illformed/entities/empty.xml +9 -0
  871. data/tests/illformed/entities/emsp.xml +9 -0
  872. data/tests/illformed/entities/ensp.xml +9 -0
  873. data/tests/illformed/entities/epsilon.xml +9 -0
  874. data/tests/illformed/entities/equiv.xml +9 -0
  875. data/tests/illformed/entities/eta.xml +9 -0
  876. data/tests/illformed/entities/eth.xml +9 -0
  877. data/tests/illformed/entities/euml.xml +9 -0
  878. data/tests/illformed/entities/euro.xml +9 -0
  879. data/tests/illformed/entities/exist.xml +9 -0
  880. data/tests/illformed/entities/fnof.xml +9 -0
  881. data/tests/illformed/entities/forall.xml +9 -0
  882. data/tests/illformed/entities/frac12.xml +9 -0
  883. data/tests/illformed/entities/frac14.xml +9 -0
  884. data/tests/illformed/entities/frac34.xml +9 -0
  885. data/tests/illformed/entities/frasl.xml +9 -0
  886. data/tests/illformed/entities/gamma.xml +9 -0
  887. data/tests/illformed/entities/ge.xml +9 -0
  888. data/tests/illformed/entities/hArr.xml +9 -0
  889. data/tests/illformed/entities/hearts.xml +9 -0
  890. data/tests/illformed/entities/hellip.xml +9 -0
  891. data/tests/illformed/entities/iacute.xml +9 -0
  892. data/tests/illformed/entities/icirc.xml +9 -0
  893. data/tests/illformed/entities/iexcl.xml +9 -0
  894. data/tests/illformed/entities/igrave.xml +9 -0
  895. data/tests/illformed/entities/image.xml +9 -0
  896. data/tests/illformed/entities/infin.xml +9 -0
  897. data/tests/illformed/entities/int.xml +9 -0
  898. data/tests/illformed/entities/iota.xml +9 -0
  899. data/tests/illformed/entities/iquest.xml +9 -0
  900. data/tests/illformed/entities/isin.xml +9 -0
  901. data/tests/illformed/entities/iuml.xml +9 -0
  902. data/tests/illformed/entities/kappa.xml +9 -0
  903. data/tests/illformed/entities/lArr.xml +9 -0
  904. data/tests/illformed/entities/lambda.xml +9 -0
  905. data/tests/illformed/entities/lang.xml +9 -0
  906. data/tests/illformed/entities/laquo.xml +9 -0
  907. data/tests/illformed/entities/lceil.xml +9 -0
  908. data/tests/illformed/entities/ldquo.xml +9 -0
  909. data/tests/illformed/entities/le.xml +9 -0
  910. data/tests/illformed/entities/lfloor.xml +9 -0
  911. data/tests/illformed/entities/lowast.xml +9 -0
  912. data/tests/illformed/entities/loz.xml +9 -0
  913. data/tests/illformed/entities/lrm.xml +9 -0
  914. data/tests/illformed/entities/lsaquo.xml +9 -0
  915. data/tests/illformed/entities/lsquo.xml +9 -0
  916. data/tests/illformed/entities/macr.xml +9 -0
  917. data/tests/illformed/entities/mdash.xml +9 -0
  918. data/tests/illformed/entities/micro.xml +9 -0
  919. data/tests/illformed/entities/middot.xml +9 -0
  920. data/tests/illformed/entities/minus.xml +9 -0
  921. data/tests/illformed/entities/mu.xml +9 -0
  922. data/tests/illformed/entities/nabla.xml +9 -0
  923. data/tests/illformed/entities/nbsp.xml +9 -0
  924. data/tests/illformed/entities/ndash.xml +9 -0
  925. data/tests/illformed/entities/ne.xml +9 -0
  926. data/tests/illformed/entities/ni.xml +9 -0
  927. data/tests/illformed/entities/not.xml +9 -0
  928. data/tests/illformed/entities/notin.xml +9 -0
  929. data/tests/illformed/entities/nsub.xml +9 -0
  930. data/tests/illformed/entities/ntilde.xml +9 -0
  931. data/tests/illformed/entities/nu.xml +9 -0
  932. data/tests/illformed/entities/oacute.xml +9 -0
  933. data/tests/illformed/entities/ocirc.xml +9 -0
  934. data/tests/illformed/entities/oelig.xml +9 -0
  935. data/tests/illformed/entities/ograve.xml +9 -0
  936. data/tests/illformed/entities/oline.xml +9 -0
  937. data/tests/illformed/entities/omega.xml +9 -0
  938. data/tests/illformed/entities/omicron.xml +9 -0
  939. data/tests/illformed/entities/oplus.xml +9 -0
  940. data/tests/illformed/entities/or.xml +9 -0
  941. data/tests/illformed/entities/ordf.xml +9 -0
  942. data/tests/illformed/entities/ordm.xml +9 -0
  943. data/tests/illformed/entities/oslash.xml +9 -0
  944. data/tests/illformed/entities/otilde.xml +9 -0
  945. data/tests/illformed/entities/otimes.xml +9 -0
  946. data/tests/illformed/entities/ouml.xml +9 -0
  947. data/tests/illformed/entities/para.xml +9 -0
  948. data/tests/illformed/entities/part.xml +9 -0
  949. data/tests/illformed/entities/permil.xml +9 -0
  950. data/tests/illformed/entities/perp.xml +9 -0
  951. data/tests/illformed/entities/phi.xml +9 -0
  952. data/tests/illformed/entities/pi.xml +9 -0
  953. data/tests/illformed/entities/piv.xml +9 -0
  954. data/tests/illformed/entities/plusmn.xml +9 -0
  955. data/tests/illformed/entities/pound.xml +9 -0
  956. data/tests/illformed/entities/prime.xml +9 -0
  957. data/tests/illformed/entities/prod.xml +9 -0
  958. data/tests/illformed/entities/prop.xml +9 -0
  959. data/tests/illformed/entities/psi.xml +9 -0
  960. data/tests/illformed/entities/radic.xml +9 -0
  961. data/tests/illformed/entities/rang.xml +9 -0
  962. data/tests/illformed/entities/raquo.xml +9 -0
  963. data/tests/illformed/entities/rarr.xml +9 -0
  964. data/tests/illformed/entities/rceil.xml +9 -0
  965. data/tests/illformed/entities/rdquo.xml +9 -0
  966. data/tests/illformed/entities/real.xml +9 -0
  967. data/tests/illformed/entities/reg.xml +9 -0
  968. data/tests/illformed/entities/rfloor.xml +9 -0
  969. data/tests/illformed/entities/rho.xml +9 -0
  970. data/tests/illformed/entities/rlm.xml +9 -0
  971. data/tests/illformed/entities/rsaquo.xml +9 -0
  972. data/tests/illformed/entities/rsquo.xml +9 -0
  973. data/tests/illformed/entities/sbquo.xml +9 -0
  974. data/tests/illformed/entities/scaron.xml +9 -0
  975. data/tests/illformed/entities/sdot.xml +9 -0
  976. data/tests/illformed/entities/sect.xml +9 -0
  977. data/tests/illformed/entities/shy.xml +9 -0
  978. data/tests/illformed/entities/sigma.xml +9 -0
  979. data/tests/illformed/entities/sigmaf.xml +9 -0
  980. data/tests/illformed/entities/sim.xml +9 -0
  981. data/tests/illformed/entities/spades.xml +9 -0
  982. data/tests/illformed/entities/sub.xml +9 -0
  983. data/tests/illformed/entities/sube.xml +9 -0
  984. data/tests/illformed/entities/sum.xml +9 -0
  985. data/tests/illformed/entities/sup.xml +9 -0
  986. data/tests/illformed/entities/sup1.xml +9 -0
  987. data/tests/illformed/entities/sup2.xml +9 -0
  988. data/tests/illformed/entities/sup3.xml +9 -0
  989. data/tests/illformed/entities/supe.xml +9 -0
  990. data/tests/illformed/entities/szlig.xml +9 -0
  991. data/tests/illformed/entities/tau.xml +9 -0
  992. data/tests/illformed/entities/there4.xml +9 -0
  993. data/tests/illformed/entities/theta.xml +9 -0
  994. data/tests/illformed/entities/thetasym.xml +9 -0
  995. data/tests/illformed/entities/thinsp.xml +9 -0
  996. data/tests/illformed/entities/thorn.xml +9 -0
  997. data/tests/illformed/entities/tilde.xml +9 -0
  998. data/tests/illformed/entities/times.xml +9 -0
  999. data/tests/illformed/entities/trade.xml +9 -0
  1000. data/tests/illformed/entities/uacute.xml +9 -0
  1001. data/tests/illformed/entities/uarr.xml +9 -0
  1002. data/tests/illformed/entities/ucirc.xml +9 -0
  1003. data/tests/illformed/entities/ugrave.xml +9 -0
  1004. data/tests/illformed/entities/uml.xml +9 -0
  1005. data/tests/illformed/entities/upper_AElig.xml +9 -0
  1006. data/tests/illformed/entities/upper_Aacute.xml +9 -0
  1007. data/tests/illformed/entities/upper_Acirc.xml +9 -0
  1008. data/tests/illformed/entities/upper_Agrave.xml +9 -0
  1009. data/tests/illformed/entities/upper_Alpha.xml +9 -0
  1010. data/tests/illformed/entities/upper_Aring.xml +9 -0
  1011. data/tests/illformed/entities/upper_Atilde.xml +9 -0
  1012. data/tests/illformed/entities/upper_Auml.xml +9 -0
  1013. data/tests/illformed/entities/upper_Beta.xml +9 -0
  1014. data/tests/illformed/entities/upper_Ccedil.xml +9 -0
  1015. data/tests/illformed/entities/upper_Chi.xml +9 -0
  1016. data/tests/illformed/entities/upper_Dagger.xml +9 -0
  1017. data/tests/illformed/entities/upper_Delta.xml +9 -0
  1018. data/tests/illformed/entities/upper_ETH.xml +9 -0
  1019. data/tests/illformed/entities/upper_Eacute.xml +9 -0
  1020. data/tests/illformed/entities/upper_Ecirc.xml +9 -0
  1021. data/tests/illformed/entities/upper_Egrave.xml +9 -0
  1022. data/tests/illformed/entities/upper_Epsilon.xml +9 -0
  1023. data/tests/illformed/entities/upper_Eta.xml +9 -0
  1024. data/tests/illformed/entities/upper_Euml.xml +9 -0
  1025. data/tests/illformed/entities/upper_Gamma.xml +9 -0
  1026. data/tests/illformed/entities/upper_Iacute.xml +9 -0
  1027. data/tests/illformed/entities/upper_Icirc.xml +9 -0
  1028. data/tests/illformed/entities/upper_Igrave.xml +9 -0
  1029. data/tests/illformed/entities/upper_Iota.xml +9 -0
  1030. data/tests/illformed/entities/upper_Iuml.xml +9 -0
  1031. data/tests/illformed/entities/upper_Kappa.xml +9 -0
  1032. data/tests/illformed/entities/upper_Lambda.xml +9 -0
  1033. data/tests/illformed/entities/upper_Mu.xml +9 -0
  1034. data/tests/illformed/entities/upper_Ntilde.xml +9 -0
  1035. data/tests/illformed/entities/upper_Nu.xml +9 -0
  1036. data/tests/illformed/entities/upper_OElig.xml +9 -0
  1037. data/tests/illformed/entities/upper_Oacute.xml +9 -0
  1038. data/tests/illformed/entities/upper_Ocirc.xml +9 -0
  1039. data/tests/illformed/entities/upper_Ograve.xml +9 -0
  1040. data/tests/illformed/entities/upper_Omega.xml +9 -0
  1041. data/tests/illformed/entities/upper_Omicron.xml +9 -0
  1042. data/tests/illformed/entities/upper_Oslash.xml +9 -0
  1043. data/tests/illformed/entities/upper_Otilde.xml +9 -0
  1044. data/tests/illformed/entities/upper_Ouml.xml +9 -0
  1045. data/tests/illformed/entities/upper_Phi.xml +9 -0
  1046. data/tests/illformed/entities/upper_Pi.xml +9 -0
  1047. data/tests/illformed/entities/upper_Prime.xml +9 -0
  1048. data/tests/illformed/entities/upper_Psi.xml +9 -0
  1049. data/tests/illformed/entities/upper_Rho.xml +9 -0
  1050. data/tests/illformed/entities/upper_Scaron.xml +9 -0
  1051. data/tests/illformed/entities/upper_Sigma.xml +9 -0
  1052. data/tests/illformed/entities/upper_THORN.xml +9 -0
  1053. data/tests/illformed/entities/upper_Tau.xml +9 -0
  1054. data/tests/illformed/entities/upper_Theta.xml +9 -0
  1055. data/tests/illformed/entities/upper_Uacute.xml +9 -0
  1056. data/tests/illformed/entities/upper_Ucirc.xml +9 -0
  1057. data/tests/illformed/entities/upper_Ugrave.xml +9 -0
  1058. data/tests/illformed/entities/upper_Upsilon.xml +9 -0
  1059. data/tests/illformed/entities/upper_Uuml.xml +9 -0
  1060. data/tests/illformed/entities/upper_Xi.xml +9 -0
  1061. data/tests/illformed/entities/upper_Yacute.xml +9 -0
  1062. data/tests/illformed/entities/upper_Yuml.xml +9 -0
  1063. data/tests/illformed/entities/upper_Zeta.xml +9 -0
  1064. data/tests/illformed/entities/upsih.xml +9 -0
  1065. data/tests/illformed/entities/upsilon.xml +9 -0
  1066. data/tests/illformed/entities/uuml.xml +9 -0
  1067. data/tests/illformed/entities/weierp.xml +9 -0
  1068. data/tests/illformed/entities/xi.xml +9 -0
  1069. data/tests/illformed/entities/yacute.xml +9 -0
  1070. data/tests/illformed/entities/yen.xml +9 -0
  1071. data/tests/illformed/entities/yuml.xml +9 -0
  1072. data/tests/illformed/entities/zeta.xml +9 -0
  1073. data/tests/illformed/entities/zwj.xml +9 -0
  1074. data/tests/illformed/entities/zwnj.xml +9 -0
  1075. data/tests/illformed/itunes/itunes_channel_block.xml +9 -0
  1076. data/tests/illformed/itunes/itunes_channel_block_false.xml +9 -0
  1077. data/tests/illformed/itunes/itunes_channel_block_no.xml +9 -0
  1078. data/tests/illformed/itunes/itunes_channel_block_true.xml +9 -0
  1079. data/tests/illformed/itunes/itunes_channel_block_uppercase.xml +9 -0
  1080. data/tests/illformed/itunes/itunes_channel_block_whitespace.xml +9 -0
  1081. data/tests/illformed/itunes/itunes_channel_category.xml +9 -0
  1082. data/tests/illformed/itunes/itunes_channel_category_nested.xml +11 -0
  1083. data/tests/illformed/itunes/itunes_channel_category_scheme.xml +9 -0
  1084. data/tests/illformed/itunes/itunes_channel_explicit.xml +9 -0
  1085. data/tests/illformed/itunes/itunes_channel_explicit_false.xml +9 -0
  1086. data/tests/illformed/itunes/itunes_channel_explicit_no.xml +9 -0
  1087. data/tests/illformed/itunes/itunes_channel_explicit_true.xml +9 -0
  1088. data/tests/illformed/itunes/itunes_channel_explicit_uppercase.xml +9 -0
  1089. data/tests/illformed/itunes/itunes_channel_explicit_whitespace.xml +9 -0
  1090. data/tests/illformed/itunes/itunes_channel_image.xml +9 -0
  1091. data/tests/illformed/itunes/itunes_channel_keywords.xml +9 -0
  1092. data/tests/illformed/itunes/itunes_channel_keywords_duplicate.xml +9 -0
  1093. data/tests/illformed/itunes/itunes_channel_keywords_duplicate_2.xml +10 -0
  1094. data/tests/illformed/itunes/itunes_channel_keywords_multiple.xml +9 -0
  1095. data/tests/illformed/itunes/itunes_channel_link_image.xml +9 -0
  1096. data/tests/illformed/itunes/itunes_channel_owner_email.xml +12 -0
  1097. data/tests/illformed/itunes/itunes_channel_owner_name.xml +12 -0
  1098. data/tests/illformed/itunes/itunes_channel_subtitle.xml +9 -0
  1099. data/tests/illformed/itunes/itunes_channel_summary.xml +9 -0
  1100. data/tests/illformed/itunes/itunes_core_element_uppercase.xml +9 -0
  1101. data/tests/illformed/itunes/itunes_enclosure_url_maps_id.xml +11 -0
  1102. data/tests/illformed/itunes/itunes_enclosure_url_maps_id_2.xml +12 -0
  1103. data/tests/illformed/itunes/itunes_item_author_map_author.xml +11 -0
  1104. data/tests/illformed/itunes/itunes_item_block.xml +11 -0
  1105. data/tests/illformed/itunes/itunes_item_block_false.xml +11 -0
  1106. data/tests/illformed/itunes/itunes_item_block_no.xml +11 -0
  1107. data/tests/illformed/itunes/itunes_item_block_true.xml +11 -0
  1108. data/tests/illformed/itunes/itunes_item_block_uppercase.xml +11 -0
  1109. data/tests/illformed/itunes/itunes_item_block_whitespace.xml +11 -0
  1110. data/tests/illformed/itunes/itunes_item_category.xml +11 -0
  1111. data/tests/illformed/itunes/itunes_item_category_nested.xml +13 -0
  1112. data/tests/illformed/itunes/itunes_item_category_scheme.xml +11 -0
  1113. data/tests/illformed/itunes/itunes_item_duration.xml +11 -0
  1114. data/tests/illformed/itunes/itunes_item_explicit.xml +11 -0
  1115. data/tests/illformed/itunes/itunes_item_explicit_false.xml +11 -0
  1116. data/tests/illformed/itunes/itunes_item_explicit_no.xml +11 -0
  1117. data/tests/illformed/itunes/itunes_item_explicit_true.xml +11 -0
  1118. data/tests/illformed/itunes/itunes_item_explicit_uppercase.xml +11 -0
  1119. data/tests/illformed/itunes/itunes_item_explicit_whitespace.xml +11 -0
  1120. data/tests/illformed/itunes/itunes_item_image.xml +11 -0
  1121. data/tests/illformed/itunes/itunes_item_link_image.xml +11 -0
  1122. data/tests/illformed/itunes/itunes_item_subtitle.xml +11 -0
  1123. data/tests/illformed/itunes/itunes_item_summary.xml +11 -0
  1124. data/tests/illformed/itunes/itunes_namespace.xml +9 -0
  1125. data/tests/illformed/itunes/itunes_namespace_example.xml +9 -0
  1126. data/tests/illformed/itunes/itunes_namespace_lowercase.xml +9 -0
  1127. data/tests/illformed/itunes/itunes_namespace_uppercase.xml +9 -0
  1128. data/tests/illformed/lang/channel_dc_language.xml +9 -0
  1129. data/tests/illformed/lang/channel_language.xml +9 -0
  1130. data/tests/illformed/lang/entry_content_xml_lang.xml +9 -0
  1131. data/tests/illformed/lang/entry_content_xml_lang_blank.xml +9 -0
  1132. data/tests/illformed/lang/entry_content_xml_lang_blank_2.xml +9 -0
  1133. data/tests/illformed/lang/entry_content_xml_lang_blank_3.xml +12 -0
  1134. data/tests/illformed/lang/entry_content_xml_lang_inherit.xml +9 -0
  1135. data/tests/illformed/lang/entry_content_xml_lang_inherit_2.xml +9 -0
  1136. data/tests/illformed/lang/entry_content_xml_lang_inherit_3.xml +10 -0
  1137. data/tests/illformed/lang/entry_content_xml_lang_inherit_4.xml +10 -0
  1138. data/tests/illformed/lang/entry_summary_xml_lang.xml +9 -0
  1139. data/tests/illformed/lang/entry_summary_xml_lang_blank.xml +9 -0
  1140. data/tests/illformed/lang/entry_summary_xml_lang_inherit.xml +9 -0
  1141. data/tests/illformed/lang/entry_summary_xml_lang_inherit_2.xml +9 -0
  1142. data/tests/illformed/lang/entry_summary_xml_lang_inherit_3.xml +10 -0
  1143. data/tests/illformed/lang/entry_summary_xml_lang_inherit_4.xml +10 -0
  1144. data/tests/illformed/lang/entry_title_xml_lang.xml +9 -0
  1145. data/tests/illformed/lang/entry_title_xml_lang_blank.xml +9 -0
  1146. data/tests/illformed/lang/entry_title_xml_lang_inherit.xml +9 -0
  1147. data/tests/illformed/lang/entry_title_xml_lang_inherit_2.xml +9 -0
  1148. data/tests/illformed/lang/entry_title_xml_lang_inherit_3.xml +10 -0
  1149. data/tests/illformed/lang/entry_title_xml_lang_inherit_4.xml +10 -0
  1150. data/tests/illformed/lang/feed_copyright_xml_lang.xml +7 -0
  1151. data/tests/illformed/lang/feed_copyright_xml_lang_blank.xml +7 -0
  1152. data/tests/illformed/lang/feed_copyright_xml_lang_inherit.xml +7 -0
  1153. data/tests/illformed/lang/feed_copyright_xml_lang_inherit_2.xml +7 -0
  1154. data/tests/illformed/lang/feed_copyright_xml_lang_inherit_3.xml +8 -0
  1155. data/tests/illformed/lang/feed_copyright_xml_lang_inherit_4.xml +8 -0
  1156. data/tests/illformed/lang/feed_info_xml_lang.xml +7 -0
  1157. data/tests/illformed/lang/feed_info_xml_lang_blank.xml +7 -0
  1158. data/tests/illformed/lang/feed_info_xml_lang_inherit.xml +7 -0
  1159. data/tests/illformed/lang/feed_info_xml_lang_inherit_2.xml +7 -0
  1160. data/tests/illformed/lang/feed_info_xml_lang_inherit_3.xml +8 -0
  1161. data/tests/illformed/lang/feed_info_xml_lang_inherit_4.xml +8 -0
  1162. data/tests/illformed/lang/feed_language.xml +9 -0
  1163. data/tests/illformed/lang/feed_language_override.xml +9 -0
  1164. data/tests/illformed/lang/feed_not_xml_lang.xml +7 -0
  1165. data/tests/illformed/lang/feed_not_xml_lang_2.xml +7 -0
  1166. data/tests/illformed/lang/feed_tagline_xml_lang.xml +7 -0
  1167. data/tests/illformed/lang/feed_tagline_xml_lang_blank.xml +7 -0
  1168. data/tests/illformed/lang/feed_tagline_xml_lang_inherit.xml +7 -0
  1169. data/tests/illformed/lang/feed_tagline_xml_lang_inherit_2.xml +7 -0
  1170. data/tests/illformed/lang/feed_tagline_xml_lang_inherit_3.xml +8 -0
  1171. data/tests/illformed/lang/feed_tagline_xml_lang_inherit_4.xml +8 -0
  1172. data/tests/illformed/lang/feed_title_xml_lang.xml +7 -0
  1173. data/tests/illformed/lang/feed_title_xml_lang_blank.xml +7 -0
  1174. data/tests/illformed/lang/feed_title_xml_lang_inherit.xml +7 -0
  1175. data/tests/illformed/lang/feed_title_xml_lang_inherit_2.xml +7 -0
  1176. data/tests/illformed/lang/feed_title_xml_lang_inherit_3.xml +8 -0
  1177. data/tests/illformed/lang/feed_title_xml_lang_inherit_4.xml +8 -0
  1178. data/tests/illformed/lang/feed_xml_lang.xml +6 -0
  1179. data/tests/illformed/lang/http_content_language.xml +7 -0
  1180. data/tests/illformed/lang/http_content_language_entry_title_inherit.xml +10 -0
  1181. data/tests/illformed/lang/http_content_language_entry_title_inherit_2.xml +11 -0
  1182. data/tests/illformed/lang/http_content_language_feed_language.xml +10 -0
  1183. data/tests/illformed/lang/http_content_language_feed_xml_lang.xml +7 -0
  1184. data/tests/illformed/lang/item_content_encoded_xml_lang.xml +11 -0
  1185. data/tests/illformed/lang/item_content_encoded_xml_lang_inherit.xml +11 -0
  1186. data/tests/illformed/lang/item_dc_language.xml +11 -0
  1187. data/tests/illformed/lang/item_fullitem_xml_lang.xml +11 -0
  1188. data/tests/illformed/lang/item_fullitem_xml_lang_inherit.xml +11 -0
  1189. data/tests/illformed/lang/item_xhtml_body_xml_lang.xml +13 -0
  1190. data/tests/illformed/lang/item_xhtml_body_xml_lang_inherit.xml +13 -0
  1191. data/tests/illformed/namespace/rss1.0withModules.xml +47 -0
  1192. data/tests/illformed/namespace/rss1.0withModulesNoDefNS.xml +48 -0
  1193. data/tests/illformed/namespace/rss1.0withModulesNoDefNSLocalNameClash.xml +53 -0
  1194. data/tests/illformed/namespace/rss2.0NSwithModules.xml +50 -0
  1195. data/tests/illformed/namespace/rss2.0NSwithModulesNoDefNS.xml +50 -0
  1196. data/tests/illformed/namespace/rss2.0NSwithModulesNoDefNSLocalNameClash.xml +58 -0
  1197. data/tests/illformed/namespace/rss2.0noNSwithModules.xml +49 -0
  1198. data/tests/illformed/namespace/rss2.0noNSwithModulesLocalNameClash.xml +57 -0
  1199. data/tests/illformed/namespace/undeclared_namespace.xml +10 -0
  1200. data/tests/illformed/rdf/rdf_channel_description.xml +9 -0
  1201. data/tests/illformed/rdf/rdf_channel_empty_textinput.xml +26 -0
  1202. data/tests/illformed/rdf/rdf_channel_link.xml +9 -0
  1203. data/tests/illformed/rdf/rdf_channel_title.xml +9 -0
  1204. data/tests/illformed/rdf/rdf_item_description.xml +16 -0
  1205. data/tests/illformed/rdf/rdf_item_link.xml +16 -0
  1206. data/tests/illformed/rdf/rdf_item_rdf_about.xml +15 -0
  1207. data/tests/illformed/rdf/rdf_item_title.xml +16 -0
  1208. data/tests/illformed/rdf/rss090_channel_title.xml +12 -0
  1209. data/tests/illformed/rdf/rss090_item_title.xml +12 -0
  1210. data/tests/illformed/rdf/rss_version_10.xml +6 -0
  1211. data/tests/illformed/rdf/rss_version_10_not_default_ns.xml +8 -0
  1212. data/tests/illformed/rss/aaa_illformed.xml +6 -0
  1213. data/tests/illformed/rss/channel_author.xml +9 -0
  1214. data/tests/illformed/rss/channel_author_map_author_detail_email.xml +9 -0
  1215. data/tests/illformed/rss/channel_author_map_author_detail_email_2.xml +9 -0
  1216. data/tests/illformed/rss/channel_author_map_author_detail_email_3.xml +9 -0
  1217. data/tests/illformed/rss/channel_author_map_author_detail_name.xml +9 -0
  1218. data/tests/illformed/rss/channel_author_map_author_detail_name_2.xml +9 -0
  1219. data/tests/illformed/rss/channel_category.xml +9 -0
  1220. data/tests/illformed/rss/channel_category_domain.xml +9 -0
  1221. data/tests/illformed/rss/channel_category_multiple.xml +10 -0
  1222. data/tests/illformed/rss/channel_category_multiple_2.xml +10 -0
  1223. data/tests/illformed/rss/channel_cloud_domain.xml +9 -0
  1224. data/tests/illformed/rss/channel_cloud_path.xml +9 -0
  1225. data/tests/illformed/rss/channel_cloud_port.xml +9 -0
  1226. data/tests/illformed/rss/channel_cloud_protocol.xml +9 -0
  1227. data/tests/illformed/rss/channel_cloud_registerProcedure.xml +9 -0
  1228. data/tests/illformed/rss/channel_copyright.xml +9 -0
  1229. data/tests/illformed/rss/channel_dc_author.xml +9 -0
  1230. data/tests/illformed/rss/channel_dc_author_map_author_detail_email.xml +9 -0
  1231. data/tests/illformed/rss/channel_dc_author_map_author_detail_name.xml +9 -0
  1232. data/tests/illformed/rss/channel_dc_contributor.xml +9 -0
  1233. data/tests/illformed/rss/channel_dc_creator.xml +9 -0
  1234. data/tests/illformed/rss/channel_dc_creator_map_author_detail_email.xml +9 -0
  1235. data/tests/illformed/rss/channel_dc_creator_map_author_detail_name.xml +9 -0
  1236. data/tests/illformed/rss/channel_dc_publisher.xml +9 -0
  1237. data/tests/illformed/rss/channel_dc_publisher_email.xml +9 -0
  1238. data/tests/illformed/rss/channel_dc_publisher_name.xml +9 -0
  1239. data/tests/illformed/rss/channel_dc_rights.xml +9 -0
  1240. data/tests/illformed/rss/channel_dc_subject.xml +9 -0
  1241. data/tests/illformed/rss/channel_dc_subject_2.xml +9 -0
  1242. data/tests/illformed/rss/channel_dc_subject_multiple.xml +10 -0
  1243. data/tests/illformed/rss/channel_dc_title.xml +9 -0
  1244. data/tests/illformed/rss/channel_description.xml +9 -0
  1245. data/tests/illformed/rss/channel_description_escaped_markup.xml +9 -0
  1246. data/tests/illformed/rss/channel_description_map_tagline.xml +9 -0
  1247. data/tests/illformed/rss/channel_description_naked_markup.xml +9 -0
  1248. data/tests/illformed/rss/channel_description_shorttag.xml +10 -0
  1249. data/tests/illformed/rss/channel_docs.xml +9 -0
  1250. data/tests/illformed/rss/channel_generator.xml +9 -0
  1251. data/tests/illformed/rss/channel_image_description.xml +16 -0
  1252. data/tests/illformed/rss/channel_image_height.xml +16 -0
  1253. data/tests/illformed/rss/channel_image_link.xml +16 -0
  1254. data/tests/illformed/rss/channel_image_link_conflict.xml +12 -0
  1255. data/tests/illformed/rss/channel_image_title.xml +16 -0
  1256. data/tests/illformed/rss/channel_image_title_conflict.xml +12 -0
  1257. data/tests/illformed/rss/channel_image_url.xml +16 -0
  1258. data/tests/illformed/rss/channel_image_width.xml +16 -0
  1259. data/tests/illformed/rss/channel_link.xml +9 -0
  1260. data/tests/illformed/rss/channel_managingEditor.xml +9 -0
  1261. data/tests/illformed/rss/channel_managingEditor_map_author_detail_email.xml +9 -0
  1262. data/tests/illformed/rss/channel_managingEditor_map_author_detail_name.xml +9 -0
  1263. data/tests/illformed/rss/channel_textInput_description.xml +14 -0
  1264. data/tests/illformed/rss/channel_textInput_description_conflict.xml +12 -0
  1265. data/tests/illformed/rss/channel_textInput_link.xml +12 -0
  1266. data/tests/illformed/rss/channel_textInput_link_conflict.xml +12 -0
  1267. data/tests/illformed/rss/channel_textInput_name.xml +11 -0
  1268. data/tests/illformed/rss/channel_textInput_title.xml +12 -0
  1269. data/tests/illformed/rss/channel_textInput_title_conflict.xml +12 -0
  1270. data/tests/illformed/rss/channel_title.xml +9 -0
  1271. data/tests/illformed/rss/channel_title_apos.xml +9 -0
  1272. data/tests/illformed/rss/channel_title_gt.xml +9 -0
  1273. data/tests/illformed/rss/channel_title_lt.xml +9 -0
  1274. data/tests/illformed/rss/channel_ttl.xml +9 -0
  1275. data/tests/illformed/rss/channel_webMaster.xml +9 -0
  1276. data/tests/illformed/rss/channel_webMaster_email.xml +9 -0
  1277. data/tests/illformed/rss/channel_webMaster_name.xml +9 -0
  1278. data/tests/illformed/rss/item_author.xml +11 -0
  1279. data/tests/illformed/rss/item_author_map_author_detail_email.xml +11 -0
  1280. data/tests/illformed/rss/item_author_map_author_detail_name.xml +11 -0
  1281. data/tests/illformed/rss/item_category.xml +11 -0
  1282. data/tests/illformed/rss/item_category_domain.xml +11 -0
  1283. data/tests/illformed/rss/item_category_multiple.xml +12 -0
  1284. data/tests/illformed/rss/item_category_multiple_2.xml +12 -0
  1285. data/tests/illformed/rss/item_comments.xml +11 -0
  1286. data/tests/illformed/rss/item_content_encoded.xml +11 -0
  1287. data/tests/illformed/rss/item_content_encoded_mode.xml +11 -0
  1288. data/tests/illformed/rss/item_content_encoded_type.xml +11 -0
  1289. data/tests/illformed/rss/item_dc_author.xml +11 -0
  1290. data/tests/illformed/rss/item_dc_author_map_author_detail_email.xml +11 -0
  1291. data/tests/illformed/rss/item_dc_author_map_author_detail_name.xml +11 -0
  1292. data/tests/illformed/rss/item_dc_contributor.xml +11 -0
  1293. data/tests/illformed/rss/item_dc_creator.xml +11 -0
  1294. data/tests/illformed/rss/item_dc_creator_map_author_detail_email.xml +11 -0
  1295. data/tests/illformed/rss/item_dc_creator_map_author_detail_name.xml +11 -0
  1296. data/tests/illformed/rss/item_dc_publisher.xml +11 -0
  1297. data/tests/illformed/rss/item_dc_publisher_email.xml +11 -0
  1298. data/tests/illformed/rss/item_dc_publisher_name.xml +11 -0
  1299. data/tests/illformed/rss/item_dc_rights.xml +11 -0
  1300. data/tests/illformed/rss/item_dc_subject.xml +11 -0
  1301. data/tests/illformed/rss/item_dc_subject_2.xml +11 -0
  1302. data/tests/illformed/rss/item_dc_subject_multiple.xml +12 -0
  1303. data/tests/illformed/rss/item_dc_title.xml +11 -0
  1304. data/tests/illformed/rss/item_description.xml +11 -0
  1305. data/tests/illformed/rss/item_description_and_summary.xml +12 -0
  1306. data/tests/illformed/rss/item_description_br.xml +11 -0
  1307. data/tests/illformed/rss/item_description_br_shorttag.xml +12 -0
  1308. data/tests/illformed/rss/item_description_escaped_markup.xml +11 -0
  1309. data/tests/illformed/rss/item_description_map_summary.xml +11 -0
  1310. data/tests/illformed/rss/item_description_naked_markup.xml +11 -0
  1311. data/tests/illformed/rss/item_description_not_a_doctype.xml +9 -0
  1312. data/tests/illformed/rss/item_enclosure_length.xml +12 -0
  1313. data/tests/illformed/rss/item_enclosure_multiple.xml +13 -0
  1314. data/tests/illformed/rss/item_enclosure_type.xml +12 -0
  1315. data/tests/illformed/rss/item_enclosure_url.xml +12 -0
  1316. data/tests/illformed/rss/item_fullitem.xml +11 -0
  1317. data/tests/illformed/rss/item_fullitem_mode.xml +11 -0
  1318. data/tests/illformed/rss/item_fullitem_type.xml +11 -0
  1319. data/tests/illformed/rss/item_guid.xml +11 -0
  1320. data/tests/illformed/rss/item_guid_conflict_link.xml +12 -0
  1321. data/tests/illformed/rss/item_guid_guidislink.xml +11 -0
  1322. data/tests/illformed/rss/item_guid_isPermaLink_conflict_link.xml +12 -0
  1323. data/tests/illformed/rss/item_guid_isPermaLink_conflict_link_not_guidislink.xml +12 -0
  1324. data/tests/illformed/rss/item_guid_isPermaLink_guidislink.xml +11 -0
  1325. data/tests/illformed/rss/item_guid_isPermaLink_map_link.xml +11 -0
  1326. data/tests/illformed/rss/item_guid_map_link.xml +11 -0
  1327. data/tests/illformed/rss/item_guid_not_permalink.xml +11 -0
  1328. data/tests/illformed/rss/item_guid_not_permalink_conflict_link.xml +12 -0
  1329. data/tests/illformed/rss/item_guid_not_permalink_not_guidislink.xml +11 -0
  1330. data/tests/illformed/rss/item_guid_not_permalink_not_guidislink_2.xml +12 -0
  1331. data/tests/illformed/rss/item_link.xml +11 -0
  1332. data/tests/illformed/rss/item_source.xml +12 -0
  1333. data/tests/illformed/rss/item_source_url.xml +12 -0
  1334. data/tests/illformed/rss/item_summary_and_description.xml +12 -0
  1335. data/tests/illformed/rss/item_title.xml +11 -0
  1336. data/tests/illformed/rss/item_xhtml_body.xml +13 -0
  1337. data/tests/illformed/rss/item_xhtml_body_mode.xml +13 -0
  1338. data/tests/illformed/rss/item_xhtml_body_type.xml +13 -0
  1339. data/tests/illformed/rss/rss_namespace_1.xml +9 -0
  1340. data/tests/illformed/rss/rss_namespace_2.xml +9 -0
  1341. data/tests/illformed/rss/rss_namespace_3.xml +9 -0
  1342. data/tests/illformed/rss/rss_namespace_4.xml +9 -0
  1343. data/tests/illformed/rss/rss_version_090.xml +6 -0
  1344. data/tests/illformed/rss/rss_version_091_netscape.xml +7 -0
  1345. data/tests/illformed/rss/rss_version_092.xml +6 -0
  1346. data/tests/illformed/rss/rss_version_093.xml +6 -0
  1347. data/tests/illformed/rss/rss_version_094.xml +6 -0
  1348. data/tests/illformed/rss/rss_version_20.xml +6 -0
  1349. data/tests/illformed/rss/rss_version_201.xml +6 -0
  1350. data/tests/illformed/rss/rss_version_21.xml +6 -0
  1351. data/tests/illformed/rss/rss_version_missing.xml +9 -0
  1352. data/tests/illformed/sanitize/entry_content_applet.xml +9 -0
  1353. data/tests/illformed/sanitize/entry_content_blink.xml +9 -0
  1354. data/tests/illformed/sanitize/entry_content_crazy.xml +75 -0
  1355. data/tests/illformed/sanitize/entry_content_embed.xml +9 -0
  1356. data/tests/illformed/sanitize/entry_content_frame.xml +9 -0
  1357. data/tests/illformed/sanitize/entry_content_iframe.xml +9 -0
  1358. data/tests/illformed/sanitize/entry_content_link.xml +9 -0
  1359. data/tests/illformed/sanitize/entry_content_meta.xml +9 -0
  1360. data/tests/illformed/sanitize/entry_content_object.xml +9 -0
  1361. data/tests/illformed/sanitize/entry_content_onabort.xml +9 -0
  1362. data/tests/illformed/sanitize/entry_content_onblur.xml +9 -0
  1363. data/tests/illformed/sanitize/entry_content_onchange.xml +9 -0
  1364. data/tests/illformed/sanitize/entry_content_onclick.xml +9 -0
  1365. data/tests/illformed/sanitize/entry_content_ondblclick.xml +9 -0
  1366. data/tests/illformed/sanitize/entry_content_onerror.xml +9 -0
  1367. data/tests/illformed/sanitize/entry_content_onfocus.xml +9 -0
  1368. data/tests/illformed/sanitize/entry_content_onkeydown.xml +9 -0
  1369. data/tests/illformed/sanitize/entry_content_onkeypress.xml +9 -0
  1370. data/tests/illformed/sanitize/entry_content_onkeyup.xml +9 -0
  1371. data/tests/illformed/sanitize/entry_content_onload.xml +9 -0
  1372. data/tests/illformed/sanitize/entry_content_onmousedown.xml +9 -0
  1373. data/tests/illformed/sanitize/entry_content_onmouseout.xml +9 -0
  1374. data/tests/illformed/sanitize/entry_content_onmouseover.xml +9 -0
  1375. data/tests/illformed/sanitize/entry_content_onmouseup.xml +9 -0
  1376. data/tests/illformed/sanitize/entry_content_onreset.xml +9 -0
  1377. data/tests/illformed/sanitize/entry_content_onresize.xml +9 -0
  1378. data/tests/illformed/sanitize/entry_content_onsubmit.xml +9 -0
  1379. data/tests/illformed/sanitize/entry_content_onunload.xml +9 -0
  1380. data/tests/illformed/sanitize/entry_content_script.xml +9 -0
  1381. data/tests/illformed/sanitize/entry_content_script_base64.xml +12 -0
  1382. data/tests/illformed/sanitize/entry_content_script_cdata.xml +9 -0
  1383. data/tests/illformed/sanitize/entry_content_script_inline.xml +9 -0
  1384. data/tests/illformed/sanitize/entry_content_style.xml +9 -0
  1385. data/tests/illformed/sanitize/entry_summary_applet.xml +9 -0
  1386. data/tests/illformed/sanitize/entry_summary_blink.xml +9 -0
  1387. data/tests/illformed/sanitize/entry_summary_crazy.xml +75 -0
  1388. data/tests/illformed/sanitize/entry_summary_embed.xml +9 -0
  1389. data/tests/illformed/sanitize/entry_summary_frame.xml +9 -0
  1390. data/tests/illformed/sanitize/entry_summary_iframe.xml +9 -0
  1391. data/tests/illformed/sanitize/entry_summary_link.xml +9 -0
  1392. data/tests/illformed/sanitize/entry_summary_meta.xml +9 -0
  1393. data/tests/illformed/sanitize/entry_summary_object.xml +9 -0
  1394. data/tests/illformed/sanitize/entry_summary_onabort.xml +9 -0
  1395. data/tests/illformed/sanitize/entry_summary_onblur.xml +9 -0
  1396. data/tests/illformed/sanitize/entry_summary_onchange.xml +9 -0
  1397. data/tests/illformed/sanitize/entry_summary_onclick.xml +9 -0
  1398. data/tests/illformed/sanitize/entry_summary_ondblclick.xml +9 -0
  1399. data/tests/illformed/sanitize/entry_summary_onerror.xml +9 -0
  1400. data/tests/illformed/sanitize/entry_summary_onfocus.xml +9 -0
  1401. data/tests/illformed/sanitize/entry_summary_onkeydown.xml +9 -0
  1402. data/tests/illformed/sanitize/entry_summary_onkeypress.xml +9 -0
  1403. data/tests/illformed/sanitize/entry_summary_onkeyup.xml +9 -0
  1404. data/tests/illformed/sanitize/entry_summary_onload.xml +9 -0
  1405. data/tests/illformed/sanitize/entry_summary_onmousedown.xml +9 -0
  1406. data/tests/illformed/sanitize/entry_summary_onmouseout.xml +9 -0
  1407. data/tests/illformed/sanitize/entry_summary_onmouseover.xml +9 -0
  1408. data/tests/illformed/sanitize/entry_summary_onmouseup.xml +9 -0
  1409. data/tests/illformed/sanitize/entry_summary_onreset.xml +9 -0
  1410. data/tests/illformed/sanitize/entry_summary_onresize.xml +9 -0
  1411. data/tests/illformed/sanitize/entry_summary_onsubmit.xml +9 -0
  1412. data/tests/illformed/sanitize/entry_summary_onunload.xml +9 -0
  1413. data/tests/illformed/sanitize/entry_summary_script.xml +9 -0
  1414. data/tests/illformed/sanitize/entry_summary_script_base64.xml +12 -0
  1415. data/tests/illformed/sanitize/entry_summary_script_cdata.xml +9 -0
  1416. data/tests/illformed/sanitize/entry_summary_script_inline.xml +9 -0
  1417. data/tests/illformed/sanitize/entry_summary_script_map_description.xml +9 -0
  1418. data/tests/illformed/sanitize/entry_summary_style.xml +9 -0
  1419. data/tests/illformed/sanitize/entry_title_applet.xml +9 -0
  1420. data/tests/illformed/sanitize/entry_title_blink.xml +9 -0
  1421. data/tests/illformed/sanitize/entry_title_crazy.xml +75 -0
  1422. data/tests/illformed/sanitize/entry_title_embed.xml +9 -0
  1423. data/tests/illformed/sanitize/entry_title_frame.xml +9 -0
  1424. data/tests/illformed/sanitize/entry_title_iframe.xml +9 -0
  1425. data/tests/illformed/sanitize/entry_title_link.xml +9 -0
  1426. data/tests/illformed/sanitize/entry_title_meta.xml +9 -0
  1427. data/tests/illformed/sanitize/entry_title_object.xml +9 -0
  1428. data/tests/illformed/sanitize/entry_title_onabort.xml +9 -0
  1429. data/tests/illformed/sanitize/entry_title_onblur.xml +9 -0
  1430. data/tests/illformed/sanitize/entry_title_onchange.xml +9 -0
  1431. data/tests/illformed/sanitize/entry_title_onclick.xml +9 -0
  1432. data/tests/illformed/sanitize/entry_title_ondblclick.xml +9 -0
  1433. data/tests/illformed/sanitize/entry_title_onerror.xml +9 -0
  1434. data/tests/illformed/sanitize/entry_title_onfocus.xml +9 -0
  1435. data/tests/illformed/sanitize/entry_title_onkeydown.xml +9 -0
  1436. data/tests/illformed/sanitize/entry_title_onkeypress.xml +9 -0
  1437. data/tests/illformed/sanitize/entry_title_onkeyup.xml +9 -0
  1438. data/tests/illformed/sanitize/entry_title_onload.xml +9 -0
  1439. data/tests/illformed/sanitize/entry_title_onmousedown.xml +9 -0
  1440. data/tests/illformed/sanitize/entry_title_onmouseout.xml +9 -0
  1441. data/tests/illformed/sanitize/entry_title_onmouseover.xml +9 -0
  1442. data/tests/illformed/sanitize/entry_title_onmouseup.xml +9 -0
  1443. data/tests/illformed/sanitize/entry_title_onreset.xml +9 -0
  1444. data/tests/illformed/sanitize/entry_title_onresize.xml +9 -0
  1445. data/tests/illformed/sanitize/entry_title_onsubmit.xml +9 -0
  1446. data/tests/illformed/sanitize/entry_title_onunload.xml +9 -0
  1447. data/tests/illformed/sanitize/entry_title_script.xml +9 -0
  1448. data/tests/illformed/sanitize/entry_title_script_cdata.xml +9 -0
  1449. data/tests/illformed/sanitize/entry_title_script_inline.xml +9 -0
  1450. data/tests/illformed/sanitize/entry_title_style.xml +9 -0
  1451. data/tests/illformed/sanitize/feed_copyright_applet.xml +7 -0
  1452. data/tests/illformed/sanitize/feed_copyright_blink.xml +7 -0
  1453. data/tests/illformed/sanitize/feed_copyright_crazy.xml +73 -0
  1454. data/tests/illformed/sanitize/feed_copyright_embed.xml +7 -0
  1455. data/tests/illformed/sanitize/feed_copyright_frame.xml +7 -0
  1456. data/tests/illformed/sanitize/feed_copyright_iframe.xml +7 -0
  1457. data/tests/illformed/sanitize/feed_copyright_link.xml +7 -0
  1458. data/tests/illformed/sanitize/feed_copyright_meta.xml +7 -0
  1459. data/tests/illformed/sanitize/feed_copyright_object.xml +7 -0
  1460. data/tests/illformed/sanitize/feed_copyright_onabort.xml +7 -0
  1461. data/tests/illformed/sanitize/feed_copyright_onblur.xml +7 -0
  1462. data/tests/illformed/sanitize/feed_copyright_onchange.xml +7 -0
  1463. data/tests/illformed/sanitize/feed_copyright_onclick.xml +7 -0
  1464. data/tests/illformed/sanitize/feed_copyright_ondblclick.xml +7 -0
  1465. data/tests/illformed/sanitize/feed_copyright_onerror.xml +7 -0
  1466. data/tests/illformed/sanitize/feed_copyright_onfocus.xml +7 -0
  1467. data/tests/illformed/sanitize/feed_copyright_onkeydown.xml +7 -0
  1468. data/tests/illformed/sanitize/feed_copyright_onkeypress.xml +7 -0
  1469. data/tests/illformed/sanitize/feed_copyright_onkeyup.xml +7 -0
  1470. data/tests/illformed/sanitize/feed_copyright_onload.xml +7 -0
  1471. data/tests/illformed/sanitize/feed_copyright_onmousedown.xml +7 -0
  1472. data/tests/illformed/sanitize/feed_copyright_onmouseout.xml +7 -0
  1473. data/tests/illformed/sanitize/feed_copyright_onmouseover.xml +7 -0
  1474. data/tests/illformed/sanitize/feed_copyright_onmouseup.xml +7 -0
  1475. data/tests/illformed/sanitize/feed_copyright_onreset.xml +7 -0
  1476. data/tests/illformed/sanitize/feed_copyright_onresize.xml +7 -0
  1477. data/tests/illformed/sanitize/feed_copyright_onsubmit.xml +7 -0
  1478. data/tests/illformed/sanitize/feed_copyright_onunload.xml +7 -0
  1479. data/tests/illformed/sanitize/feed_copyright_script.xml +7 -0
  1480. data/tests/illformed/sanitize/feed_copyright_script_cdata.xml +7 -0
  1481. data/tests/illformed/sanitize/feed_copyright_script_inline.xml +7 -0
  1482. data/tests/illformed/sanitize/feed_copyright_style.xml +7 -0
  1483. data/tests/illformed/sanitize/feed_info_applet.xml +7 -0
  1484. data/tests/illformed/sanitize/feed_info_blink.xml +7 -0
  1485. data/tests/illformed/sanitize/feed_info_crazy.xml +73 -0
  1486. data/tests/illformed/sanitize/feed_info_embed.xml +7 -0
  1487. data/tests/illformed/sanitize/feed_info_frame.xml +7 -0
  1488. data/tests/illformed/sanitize/feed_info_iframe.xml +7 -0
  1489. data/tests/illformed/sanitize/feed_info_link.xml +7 -0
  1490. data/tests/illformed/sanitize/feed_info_meta.xml +7 -0
  1491. data/tests/illformed/sanitize/feed_info_object.xml +7 -0
  1492. data/tests/illformed/sanitize/feed_info_onabort.xml +7 -0
  1493. data/tests/illformed/sanitize/feed_info_onblur.xml +7 -0
  1494. data/tests/illformed/sanitize/feed_info_onchange.xml +7 -0
  1495. data/tests/illformed/sanitize/feed_info_onclick.xml +7 -0
  1496. data/tests/illformed/sanitize/feed_info_ondblclick.xml +7 -0
  1497. data/tests/illformed/sanitize/feed_info_onerror.xml +7 -0
  1498. data/tests/illformed/sanitize/feed_info_onfocus.xml +7 -0
  1499. data/tests/illformed/sanitize/feed_info_onkeydown.xml +7 -0
  1500. data/tests/illformed/sanitize/feed_info_onkeypress.xml +7 -0
  1501. data/tests/illformed/sanitize/feed_info_onkeyup.xml +7 -0
  1502. data/tests/illformed/sanitize/feed_info_onload.xml +7 -0
  1503. data/tests/illformed/sanitize/feed_info_onmousedown.xml +7 -0
  1504. data/tests/illformed/sanitize/feed_info_onmouseout.xml +7 -0
  1505. data/tests/illformed/sanitize/feed_info_onmouseover.xml +7 -0
  1506. data/tests/illformed/sanitize/feed_info_onmouseup.xml +7 -0
  1507. data/tests/illformed/sanitize/feed_info_onreset.xml +7 -0
  1508. data/tests/illformed/sanitize/feed_info_onresize.xml +7 -0
  1509. data/tests/illformed/sanitize/feed_info_onsubmit.xml +7 -0
  1510. data/tests/illformed/sanitize/feed_info_onunload.xml +7 -0
  1511. data/tests/illformed/sanitize/feed_info_script.xml +7 -0
  1512. data/tests/illformed/sanitize/feed_info_script_cdata.xml +7 -0
  1513. data/tests/illformed/sanitize/feed_info_script_inline.xml +7 -0
  1514. data/tests/illformed/sanitize/feed_info_style.xml +7 -0
  1515. data/tests/illformed/sanitize/feed_subtitle_applet.xml +7 -0
  1516. data/tests/illformed/sanitize/feed_subtitle_blink.xml +7 -0
  1517. data/tests/illformed/sanitize/feed_subtitle_crazy.xml +73 -0
  1518. data/tests/illformed/sanitize/feed_subtitle_embed.xml +7 -0
  1519. data/tests/illformed/sanitize/feed_subtitle_frame.xml +7 -0
  1520. data/tests/illformed/sanitize/feed_subtitle_iframe.xml +7 -0
  1521. data/tests/illformed/sanitize/feed_subtitle_link.xml +7 -0
  1522. data/tests/illformed/sanitize/feed_subtitle_meta.xml +7 -0
  1523. data/tests/illformed/sanitize/feed_subtitle_object.xml +7 -0
  1524. data/tests/illformed/sanitize/feed_subtitle_onabort.xml +7 -0
  1525. data/tests/illformed/sanitize/feed_subtitle_onblur.xml +7 -0
  1526. data/tests/illformed/sanitize/feed_subtitle_onchange.xml +7 -0
  1527. data/tests/illformed/sanitize/feed_subtitle_onclick.xml +7 -0
  1528. data/tests/illformed/sanitize/feed_subtitle_ondblclick.xml +7 -0
  1529. data/tests/illformed/sanitize/feed_subtitle_onerror.xml +7 -0
  1530. data/tests/illformed/sanitize/feed_subtitle_onfocus.xml +7 -0
  1531. data/tests/illformed/sanitize/feed_subtitle_onkeydown.xml +7 -0
  1532. data/tests/illformed/sanitize/feed_subtitle_onkeypress.xml +7 -0
  1533. data/tests/illformed/sanitize/feed_subtitle_onkeyup.xml +7 -0
  1534. data/tests/illformed/sanitize/feed_subtitle_onload.xml +7 -0
  1535. data/tests/illformed/sanitize/feed_subtitle_onmousedown.xml +7 -0
  1536. data/tests/illformed/sanitize/feed_subtitle_onmouseout.xml +7 -0
  1537. data/tests/illformed/sanitize/feed_subtitle_onmouseover.xml +7 -0
  1538. data/tests/illformed/sanitize/feed_subtitle_onmouseup.xml +7 -0
  1539. data/tests/illformed/sanitize/feed_subtitle_onreset.xml +7 -0
  1540. data/tests/illformed/sanitize/feed_subtitle_onresize.xml +7 -0
  1541. data/tests/illformed/sanitize/feed_subtitle_onsubmit.xml +7 -0
  1542. data/tests/illformed/sanitize/feed_subtitle_onunload.xml +7 -0
  1543. data/tests/illformed/sanitize/feed_subtitle_script.xml +7 -0
  1544. data/tests/illformed/sanitize/feed_subtitle_script_cdata.xml +7 -0
  1545. data/tests/illformed/sanitize/feed_subtitle_script_inline.xml +7 -0
  1546. data/tests/illformed/sanitize/feed_subtitle_style.xml +7 -0
  1547. data/tests/illformed/sanitize/feed_tagline_applet.xml +7 -0
  1548. data/tests/illformed/sanitize/feed_tagline_blink.xml +7 -0
  1549. data/tests/illformed/sanitize/feed_tagline_crazy.xml +73 -0
  1550. data/tests/illformed/sanitize/feed_tagline_embed.xml +7 -0
  1551. data/tests/illformed/sanitize/feed_tagline_frame.xml +7 -0
  1552. data/tests/illformed/sanitize/feed_tagline_iframe.xml +7 -0
  1553. data/tests/illformed/sanitize/feed_tagline_link.xml +7 -0
  1554. data/tests/illformed/sanitize/feed_tagline_meta.xml +7 -0
  1555. data/tests/illformed/sanitize/feed_tagline_object.xml +7 -0
  1556. data/tests/illformed/sanitize/feed_tagline_onabort.xml +7 -0
  1557. data/tests/illformed/sanitize/feed_tagline_onblur.xml +7 -0
  1558. data/tests/illformed/sanitize/feed_tagline_onchange.xml +7 -0
  1559. data/tests/illformed/sanitize/feed_tagline_onclick.xml +7 -0
  1560. data/tests/illformed/sanitize/feed_tagline_ondblclick.xml +7 -0
  1561. data/tests/illformed/sanitize/feed_tagline_onerror.xml +7 -0
  1562. data/tests/illformed/sanitize/feed_tagline_onfocus.xml +7 -0
  1563. data/tests/illformed/sanitize/feed_tagline_onkeydown.xml +7 -0
  1564. data/tests/illformed/sanitize/feed_tagline_onkeypress.xml +7 -0
  1565. data/tests/illformed/sanitize/feed_tagline_onkeyup.xml +7 -0
  1566. data/tests/illformed/sanitize/feed_tagline_onload.xml +7 -0
  1567. data/tests/illformed/sanitize/feed_tagline_onmousedown.xml +7 -0
  1568. data/tests/illformed/sanitize/feed_tagline_onmouseout.xml +7 -0
  1569. data/tests/illformed/sanitize/feed_tagline_onmouseover.xml +7 -0
  1570. data/tests/illformed/sanitize/feed_tagline_onmouseup.xml +7 -0
  1571. data/tests/illformed/sanitize/feed_tagline_onreset.xml +7 -0
  1572. data/tests/illformed/sanitize/feed_tagline_onresize.xml +7 -0
  1573. data/tests/illformed/sanitize/feed_tagline_onsubmit.xml +7 -0
  1574. data/tests/illformed/sanitize/feed_tagline_onunload.xml +7 -0
  1575. data/tests/illformed/sanitize/feed_tagline_script.xml +7 -0
  1576. data/tests/illformed/sanitize/feed_tagline_script_cdata.xml +7 -0
  1577. data/tests/illformed/sanitize/feed_tagline_script_inline.xml +7 -0
  1578. data/tests/illformed/sanitize/feed_tagline_script_map_description.xml +7 -0
  1579. data/tests/illformed/sanitize/feed_tagline_style.xml +7 -0
  1580. data/tests/illformed/sanitize/feed_title_applet.xml +7 -0
  1581. data/tests/illformed/sanitize/feed_title_blink.xml +7 -0
  1582. data/tests/illformed/sanitize/feed_title_crazy.xml +73 -0
  1583. data/tests/illformed/sanitize/feed_title_embed.xml +7 -0
  1584. data/tests/illformed/sanitize/feed_title_frame.xml +7 -0
  1585. data/tests/illformed/sanitize/feed_title_iframe.xml +7 -0
  1586. data/tests/illformed/sanitize/feed_title_link.xml +7 -0
  1587. data/tests/illformed/sanitize/feed_title_meta.xml +7 -0
  1588. data/tests/illformed/sanitize/feed_title_object.xml +7 -0
  1589. data/tests/illformed/sanitize/feed_title_onabort.xml +7 -0
  1590. data/tests/illformed/sanitize/feed_title_onblur.xml +7 -0
  1591. data/tests/illformed/sanitize/feed_title_onchange.xml +7 -0
  1592. data/tests/illformed/sanitize/feed_title_onclick.xml +7 -0
  1593. data/tests/illformed/sanitize/feed_title_ondblclick.xml +7 -0
  1594. data/tests/illformed/sanitize/feed_title_onerror.xml +7 -0
  1595. data/tests/illformed/sanitize/feed_title_onfocus.xml +7 -0
  1596. data/tests/illformed/sanitize/feed_title_onkeydown.xml +7 -0
  1597. data/tests/illformed/sanitize/feed_title_onkeypress.xml +7 -0
  1598. data/tests/illformed/sanitize/feed_title_onkeyup.xml +7 -0
  1599. data/tests/illformed/sanitize/feed_title_onload.xml +7 -0
  1600. data/tests/illformed/sanitize/feed_title_onmousedown.xml +7 -0
  1601. data/tests/illformed/sanitize/feed_title_onmouseout.xml +7 -0
  1602. data/tests/illformed/sanitize/feed_title_onmouseover.xml +7 -0
  1603. data/tests/illformed/sanitize/feed_title_onmouseup.xml +7 -0
  1604. data/tests/illformed/sanitize/feed_title_onreset.xml +7 -0
  1605. data/tests/illformed/sanitize/feed_title_onresize.xml +7 -0
  1606. data/tests/illformed/sanitize/feed_title_onsubmit.xml +7 -0
  1607. data/tests/illformed/sanitize/feed_title_onunload.xml +7 -0
  1608. data/tests/illformed/sanitize/feed_title_script.xml +7 -0
  1609. data/tests/illformed/sanitize/feed_title_script_cdata.xml +7 -0
  1610. data/tests/illformed/sanitize/feed_title_script_inline.xml +7 -0
  1611. data/tests/illformed/sanitize/feed_title_style.xml +7 -0
  1612. data/tests/illformed/sanitize/item_body_applet.xml +11 -0
  1613. data/tests/illformed/sanitize/item_body_blink.xml +11 -0
  1614. data/tests/illformed/sanitize/item_body_embed.xml +11 -0
  1615. data/tests/illformed/sanitize/item_body_frame.xml +11 -0
  1616. data/tests/illformed/sanitize/item_body_iframe.xml +11 -0
  1617. data/tests/illformed/sanitize/item_body_link.xml +11 -0
  1618. data/tests/illformed/sanitize/item_body_meta.xml +11 -0
  1619. data/tests/illformed/sanitize/item_body_object.xml +11 -0
  1620. data/tests/illformed/sanitize/item_body_onabort.xml +11 -0
  1621. data/tests/illformed/sanitize/item_body_onblur.xml +11 -0
  1622. data/tests/illformed/sanitize/item_body_onchange.xml +11 -0
  1623. data/tests/illformed/sanitize/item_body_onclick.xml +11 -0
  1624. data/tests/illformed/sanitize/item_body_ondblclick.xml +11 -0
  1625. data/tests/illformed/sanitize/item_body_onerror.xml +11 -0
  1626. data/tests/illformed/sanitize/item_body_onfocus.xml +11 -0
  1627. data/tests/illformed/sanitize/item_body_onkeydown.xml +11 -0
  1628. data/tests/illformed/sanitize/item_body_onkeypress.xml +11 -0
  1629. data/tests/illformed/sanitize/item_body_onkeyup.xml +11 -0
  1630. data/tests/illformed/sanitize/item_body_onload.xml +11 -0
  1631. data/tests/illformed/sanitize/item_body_onmousedown.xml +11 -0
  1632. data/tests/illformed/sanitize/item_body_onmouseout.xml +11 -0
  1633. data/tests/illformed/sanitize/item_body_onmouseover.xml +11 -0
  1634. data/tests/illformed/sanitize/item_body_onmouseup.xml +11 -0
  1635. data/tests/illformed/sanitize/item_body_onreset.xml +11 -0
  1636. data/tests/illformed/sanitize/item_body_onresize.xml +11 -0
  1637. data/tests/illformed/sanitize/item_body_onsubmit.xml +11 -0
  1638. data/tests/illformed/sanitize/item_body_onunload.xml +11 -0
  1639. data/tests/illformed/sanitize/item_body_script.xml +11 -0
  1640. data/tests/illformed/sanitize/item_body_script_map_content.xml +11 -0
  1641. data/tests/illformed/sanitize/item_body_style.xml +11 -0
  1642. data/tests/illformed/sanitize/item_content_encoded_applet.xml +11 -0
  1643. data/tests/illformed/sanitize/item_content_encoded_blink.xml +11 -0
  1644. data/tests/illformed/sanitize/item_content_encoded_crazy.xml +77 -0
  1645. data/tests/illformed/sanitize/item_content_encoded_embed.xml +11 -0
  1646. data/tests/illformed/sanitize/item_content_encoded_frame.xml +11 -0
  1647. data/tests/illformed/sanitize/item_content_encoded_iframe.xml +11 -0
  1648. data/tests/illformed/sanitize/item_content_encoded_link.xml +11 -0
  1649. data/tests/illformed/sanitize/item_content_encoded_map_content.xml +11 -0
  1650. data/tests/illformed/sanitize/item_content_encoded_meta.xml +11 -0
  1651. data/tests/illformed/sanitize/item_content_encoded_object.xml +11 -0
  1652. data/tests/illformed/sanitize/item_content_encoded_onabort.xml +11 -0
  1653. data/tests/illformed/sanitize/item_content_encoded_onblur.xml +11 -0
  1654. data/tests/illformed/sanitize/item_content_encoded_onchange.xml +11 -0
  1655. data/tests/illformed/sanitize/item_content_encoded_onclick.xml +11 -0
  1656. data/tests/illformed/sanitize/item_content_encoded_ondblclick.xml +11 -0
  1657. data/tests/illformed/sanitize/item_content_encoded_onerror.xml +11 -0
  1658. data/tests/illformed/sanitize/item_content_encoded_onfocus.xml +11 -0
  1659. data/tests/illformed/sanitize/item_content_encoded_onkeydown.xml +11 -0
  1660. data/tests/illformed/sanitize/item_content_encoded_onkeypress.xml +11 -0
  1661. data/tests/illformed/sanitize/item_content_encoded_onkeyup.xml +11 -0
  1662. data/tests/illformed/sanitize/item_content_encoded_onload.xml +11 -0
  1663. data/tests/illformed/sanitize/item_content_encoded_onmousedown.xml +11 -0
  1664. data/tests/illformed/sanitize/item_content_encoded_onmouseout.xml +11 -0
  1665. data/tests/illformed/sanitize/item_content_encoded_onmouseover.xml +11 -0
  1666. data/tests/illformed/sanitize/item_content_encoded_onmouseup.xml +11 -0
  1667. data/tests/illformed/sanitize/item_content_encoded_onreset.xml +11 -0
  1668. data/tests/illformed/sanitize/item_content_encoded_onresize.xml +11 -0
  1669. data/tests/illformed/sanitize/item_content_encoded_onsubmit.xml +11 -0
  1670. data/tests/illformed/sanitize/item_content_encoded_onunload.xml +11 -0
  1671. data/tests/illformed/sanitize/item_content_encoded_script.xml +11 -0
  1672. data/tests/illformed/sanitize/item_content_encoded_script_cdata.xml +11 -0
  1673. data/tests/illformed/sanitize/item_content_encoded_script_map_content.xml +11 -0
  1674. data/tests/illformed/sanitize/item_content_encoded_style.xml +11 -0
  1675. data/tests/illformed/sanitize/item_description_applet.xml +11 -0
  1676. data/tests/illformed/sanitize/item_description_blink.xml +11 -0
  1677. data/tests/illformed/sanitize/item_description_crazy.xml +81 -0
  1678. data/tests/illformed/sanitize/item_description_embed.xml +11 -0
  1679. data/tests/illformed/sanitize/item_description_frame.xml +11 -0
  1680. data/tests/illformed/sanitize/item_description_iframe.xml +11 -0
  1681. data/tests/illformed/sanitize/item_description_link.xml +11 -0
  1682. data/tests/illformed/sanitize/item_description_meta.xml +11 -0
  1683. data/tests/illformed/sanitize/item_description_object.xml +11 -0
  1684. data/tests/illformed/sanitize/item_description_onabort.xml +11 -0
  1685. data/tests/illformed/sanitize/item_description_onblur.xml +11 -0
  1686. data/tests/illformed/sanitize/item_description_onchange.xml +11 -0
  1687. data/tests/illformed/sanitize/item_description_onclick.xml +11 -0
  1688. data/tests/illformed/sanitize/item_description_ondblclick.xml +11 -0
  1689. data/tests/illformed/sanitize/item_description_onerror.xml +11 -0
  1690. data/tests/illformed/sanitize/item_description_onfocus.xml +11 -0
  1691. data/tests/illformed/sanitize/item_description_onkeydown.xml +11 -0
  1692. data/tests/illformed/sanitize/item_description_onkeypress.xml +11 -0
  1693. data/tests/illformed/sanitize/item_description_onkeyup.xml +11 -0
  1694. data/tests/illformed/sanitize/item_description_onload.xml +11 -0
  1695. data/tests/illformed/sanitize/item_description_onmousedown.xml +11 -0
  1696. data/tests/illformed/sanitize/item_description_onmouseout.xml +11 -0
  1697. data/tests/illformed/sanitize/item_description_onmouseover.xml +11 -0
  1698. data/tests/illformed/sanitize/item_description_onmouseup.xml +11 -0
  1699. data/tests/illformed/sanitize/item_description_onreset.xml +11 -0
  1700. data/tests/illformed/sanitize/item_description_onresize.xml +11 -0
  1701. data/tests/illformed/sanitize/item_description_onsubmit.xml +11 -0
  1702. data/tests/illformed/sanitize/item_description_onunload.xml +11 -0
  1703. data/tests/illformed/sanitize/item_description_script.xml +11 -0
  1704. data/tests/illformed/sanitize/item_description_script_cdata.xml +11 -0
  1705. data/tests/illformed/sanitize/item_description_script_map_summary.xml +11 -0
  1706. data/tests/illformed/sanitize/item_description_style.xml +11 -0
  1707. data/tests/illformed/sanitize/item_fullitem_applet.xml +11 -0
  1708. data/tests/illformed/sanitize/item_fullitem_blink.xml +11 -0
  1709. data/tests/illformed/sanitize/item_fullitem_crazy.xml +77 -0
  1710. data/tests/illformed/sanitize/item_fullitem_embed.xml +11 -0
  1711. data/tests/illformed/sanitize/item_fullitem_frame.xml +11 -0
  1712. data/tests/illformed/sanitize/item_fullitem_iframe.xml +11 -0
  1713. data/tests/illformed/sanitize/item_fullitem_link.xml +11 -0
  1714. data/tests/illformed/sanitize/item_fullitem_meta.xml +11 -0
  1715. data/tests/illformed/sanitize/item_fullitem_object.xml +11 -0
  1716. data/tests/illformed/sanitize/item_fullitem_onabort.xml +11 -0
  1717. data/tests/illformed/sanitize/item_fullitem_onblur.xml +11 -0
  1718. data/tests/illformed/sanitize/item_fullitem_onchange.xml +11 -0
  1719. data/tests/illformed/sanitize/item_fullitem_onclick.xml +11 -0
  1720. data/tests/illformed/sanitize/item_fullitem_ondblclick.xml +11 -0
  1721. data/tests/illformed/sanitize/item_fullitem_onerror.xml +11 -0
  1722. data/tests/illformed/sanitize/item_fullitem_onfocus.xml +11 -0
  1723. data/tests/illformed/sanitize/item_fullitem_onkeydown.xml +11 -0
  1724. data/tests/illformed/sanitize/item_fullitem_onkeypress.xml +11 -0
  1725. data/tests/illformed/sanitize/item_fullitem_onkeyup.xml +11 -0
  1726. data/tests/illformed/sanitize/item_fullitem_onload.xml +11 -0
  1727. data/tests/illformed/sanitize/item_fullitem_onmousedown.xml +11 -0
  1728. data/tests/illformed/sanitize/item_fullitem_onmouseout.xml +11 -0
  1729. data/tests/illformed/sanitize/item_fullitem_onmouseover.xml +11 -0
  1730. data/tests/illformed/sanitize/item_fullitem_onmouseup.xml +11 -0
  1731. data/tests/illformed/sanitize/item_fullitem_onreset.xml +11 -0
  1732. data/tests/illformed/sanitize/item_fullitem_onresize.xml +11 -0
  1733. data/tests/illformed/sanitize/item_fullitem_onsubmit.xml +11 -0
  1734. data/tests/illformed/sanitize/item_fullitem_onunload.xml +11 -0
  1735. data/tests/illformed/sanitize/item_fullitem_script.xml +11 -0
  1736. data/tests/illformed/sanitize/item_fullitem_script_cdata.xml +11 -0
  1737. data/tests/illformed/sanitize/item_fullitem_script_map_summary.xml +11 -0
  1738. data/tests/illformed/sanitize/item_fullitem_style.xml +11 -0
  1739. data/tests/illformed/sanitize/item_xhtml_body_applet.xml +11 -0
  1740. data/tests/illformed/sanitize/item_xhtml_body_blink.xml +11 -0
  1741. data/tests/illformed/sanitize/item_xhtml_body_embed.xml +11 -0
  1742. data/tests/illformed/sanitize/item_xhtml_body_frame.xml +11 -0
  1743. data/tests/illformed/sanitize/item_xhtml_body_iframe.xml +11 -0
  1744. data/tests/illformed/sanitize/item_xhtml_body_link.xml +11 -0
  1745. data/tests/illformed/sanitize/item_xhtml_body_meta.xml +11 -0
  1746. data/tests/illformed/sanitize/item_xhtml_body_object.xml +11 -0
  1747. data/tests/illformed/sanitize/item_xhtml_body_onabort.xml +11 -0
  1748. data/tests/illformed/sanitize/item_xhtml_body_onblur.xml +11 -0
  1749. data/tests/illformed/sanitize/item_xhtml_body_onchange.xml +11 -0
  1750. data/tests/illformed/sanitize/item_xhtml_body_onclick.xml +11 -0
  1751. data/tests/illformed/sanitize/item_xhtml_body_ondblclick.xml +11 -0
  1752. data/tests/illformed/sanitize/item_xhtml_body_onerror.xml +11 -0
  1753. data/tests/illformed/sanitize/item_xhtml_body_onfocus.xml +11 -0
  1754. data/tests/illformed/sanitize/item_xhtml_body_onkeydown.xml +11 -0
  1755. data/tests/illformed/sanitize/item_xhtml_body_onkeypress.xml +11 -0
  1756. data/tests/illformed/sanitize/item_xhtml_body_onkeyup.xml +11 -0
  1757. data/tests/illformed/sanitize/item_xhtml_body_onload.xml +11 -0
  1758. data/tests/illformed/sanitize/item_xhtml_body_onmousedown.xml +11 -0
  1759. data/tests/illformed/sanitize/item_xhtml_body_onmouseout.xml +11 -0
  1760. data/tests/illformed/sanitize/item_xhtml_body_onmouseover.xml +11 -0
  1761. data/tests/illformed/sanitize/item_xhtml_body_onmouseup.xml +11 -0
  1762. data/tests/illformed/sanitize/item_xhtml_body_onreset.xml +11 -0
  1763. data/tests/illformed/sanitize/item_xhtml_body_onresize.xml +11 -0
  1764. data/tests/illformed/sanitize/item_xhtml_body_onsubmit.xml +11 -0
  1765. data/tests/illformed/sanitize/item_xhtml_body_onunload.xml +11 -0
  1766. data/tests/illformed/sanitize/item_xhtml_body_script.xml +11 -0
  1767. data/tests/illformed/sanitize/item_xhtml_body_script_map_content.xml +11 -0
  1768. data/tests/illformed/sanitize/item_xhtml_body_style.xml +11 -0
  1769. data/tests/wellformed/amp/amp01.xml +9 -0
  1770. data/tests/wellformed/amp/amp02.xml +9 -0
  1771. data/tests/wellformed/amp/amp03.xml +9 -0
  1772. data/tests/wellformed/amp/amp04.xml +9 -0
  1773. data/tests/wellformed/amp/amp05.xml +9 -0
  1774. data/tests/wellformed/amp/amp06.xml +9 -0
  1775. data/tests/wellformed/amp/amp07.xml +9 -0
  1776. data/tests/wellformed/amp/amp08.xml +9 -0
  1777. data/tests/wellformed/amp/amp09.xml +9 -0
  1778. data/tests/wellformed/amp/amp10.xml +9 -0
  1779. data/tests/wellformed/amp/amp11.xml +9 -0
  1780. data/tests/wellformed/amp/amp12.xml +9 -0
  1781. data/tests/wellformed/amp/amp13.xml +9 -0
  1782. data/tests/wellformed/amp/amp14.xml +9 -0
  1783. data/tests/wellformed/amp/amp15.xml +9 -0
  1784. data/tests/wellformed/amp/amp16.xml +9 -0
  1785. data/tests/wellformed/amp/amp17.xml +9 -0
  1786. data/tests/wellformed/amp/amp18.xml +9 -0
  1787. data/tests/wellformed/amp/amp19.xml +9 -0
  1788. data/tests/wellformed/amp/amp20.xml +9 -0
  1789. data/tests/wellformed/amp/amp21.xml +9 -0
  1790. data/tests/wellformed/amp/amp22.xml +9 -0
  1791. data/tests/wellformed/amp/amp23.xml +9 -0
  1792. data/tests/wellformed/amp/amp24.xml +9 -0
  1793. data/tests/wellformed/amp/amp25.xml +9 -0
  1794. data/tests/wellformed/amp/amp26.xml +9 -0
  1795. data/tests/wellformed/amp/amp27.xml +9 -0
  1796. data/tests/wellformed/amp/amp28.xml +9 -0
  1797. data/tests/wellformed/amp/amp29.xml +9 -0
  1798. data/tests/wellformed/amp/amp30.xml +9 -0
  1799. data/tests/wellformed/amp/amp31.xml +9 -0
  1800. data/tests/wellformed/amp/amp32.xml +9 -0
  1801. data/tests/wellformed/amp/amp33.xml +9 -0
  1802. data/tests/wellformed/amp/amp34.xml +9 -0
  1803. data/tests/wellformed/amp/amp35.xml +9 -0
  1804. data/tests/wellformed/amp/amp36.xml +9 -0
  1805. data/tests/wellformed/amp/amp37.xml +9 -0
  1806. data/tests/wellformed/amp/amp38.xml +9 -0
  1807. data/tests/wellformed/amp/amp39.xml +9 -0
  1808. data/tests/wellformed/amp/amp40.xml +9 -0
  1809. data/tests/wellformed/amp/amp41.xml +9 -0
  1810. data/tests/wellformed/amp/amp42.xml +9 -0
  1811. data/tests/wellformed/amp/amp43.xml +9 -0
  1812. data/tests/wellformed/amp/amp44.xml +9 -0
  1813. data/tests/wellformed/amp/amp45.xml +9 -0
  1814. data/tests/wellformed/amp/amp46.xml +9 -0
  1815. data/tests/wellformed/amp/amp47.xml +9 -0
  1816. data/tests/wellformed/amp/amp48.xml +9 -0
  1817. data/tests/wellformed/amp/amp49.xml +9 -0
  1818. data/tests/wellformed/amp/amp50.xml +9 -0
  1819. data/tests/wellformed/amp/amp51.xml +9 -0
  1820. data/tests/wellformed/amp/amp52.xml +9 -0
  1821. data/tests/wellformed/amp/amp53.xml +9 -0
  1822. data/tests/wellformed/amp/amp54.xml +9 -0
  1823. data/tests/wellformed/amp/amp55.xml +9 -0
  1824. data/tests/wellformed/amp/amp56.xml +9 -0
  1825. data/tests/wellformed/amp/amp57.xml +9 -0
  1826. data/tests/wellformed/amp/amp58.xml +9 -0
  1827. data/tests/wellformed/amp/amp59.xml +9 -0
  1828. data/tests/wellformed/amp/amp60.xml +9 -0
  1829. data/tests/wellformed/amp/amp61.xml +9 -0
  1830. data/tests/wellformed/amp/amp62.xml +9 -0
  1831. data/tests/wellformed/amp/amp63.xml +9 -0
  1832. data/tests/wellformed/amp/amp64.xml +9 -0
  1833. data/tests/wellformed/atom/atom_namespace_1.xml +7 -0
  1834. data/tests/wellformed/atom/atom_namespace_2.xml +7 -0
  1835. data/tests/wellformed/atom/atom_namespace_3.xml +7 -0
  1836. data/tests/wellformed/atom/atom_namespace_4.xml +7 -0
  1837. data/tests/wellformed/atom/atom_namespace_5.xml +7 -0
  1838. data/tests/wellformed/atom/entry_author_email.xml +13 -0
  1839. data/tests/wellformed/atom/entry_author_homepage.xml +13 -0
  1840. data/tests/wellformed/atom/entry_author_map_author.xml +13 -0
  1841. data/tests/wellformed/atom/entry_author_map_author_2.xml +12 -0
  1842. data/tests/wellformed/atom/entry_author_name.xml +13 -0
  1843. data/tests/wellformed/atom/entry_author_uri.xml +13 -0
  1844. data/tests/wellformed/atom/entry_author_url.xml +13 -0
  1845. data/tests/wellformed/atom/entry_content_mode_base64.xml +11 -0
  1846. data/tests/wellformed/atom/entry_content_mode_escaped.xml +9 -0
  1847. data/tests/wellformed/atom/entry_content_type.xml +9 -0
  1848. data/tests/wellformed/atom/entry_content_type_text_plain.xml +9 -0
  1849. data/tests/wellformed/atom/entry_content_value.xml +9 -0
  1850. data/tests/wellformed/atom/entry_contributor_email.xml +13 -0
  1851. data/tests/wellformed/atom/entry_contributor_homepage.xml +13 -0
  1852. data/tests/wellformed/atom/entry_contributor_multiple.xml +18 -0
  1853. data/tests/wellformed/atom/entry_contributor_name.xml +13 -0
  1854. data/tests/wellformed/atom/entry_contributor_uri.xml +13 -0
  1855. data/tests/wellformed/atom/entry_contributor_url.xml +13 -0
  1856. data/tests/wellformed/atom/entry_id.xml +9 -0
  1857. data/tests/wellformed/atom/entry_id_map_guid.xml +9 -0
  1858. data/tests/wellformed/atom/entry_link_alternate_map_link.xml +9 -0
  1859. data/tests/wellformed/atom/entry_link_alternate_map_link_2.xml +9 -0
  1860. data/tests/wellformed/atom/entry_link_href.xml +9 -0
  1861. data/tests/wellformed/atom/entry_link_multiple.xml +10 -0
  1862. data/tests/wellformed/atom/entry_link_rel.xml +9 -0
  1863. data/tests/wellformed/atom/entry_link_title.xml +9 -0
  1864. data/tests/wellformed/atom/entry_link_type.xml +9 -0
  1865. data/tests/wellformed/atom/entry_summary.xml +9 -0
  1866. data/tests/wellformed/atom/entry_summary_base64.xml +11 -0
  1867. data/tests/wellformed/atom/entry_summary_base64_2.xml +11 -0
  1868. data/tests/wellformed/atom/entry_summary_content_mode_base64.xml +11 -0
  1869. data/tests/wellformed/atom/entry_summary_content_mode_escaped.xml +9 -0
  1870. data/tests/wellformed/atom/entry_summary_content_type.xml +9 -0
  1871. data/tests/wellformed/atom/entry_summary_content_type_text_plain.xml +9 -0
  1872. data/tests/wellformed/atom/entry_summary_content_value.xml +9 -0
  1873. data/tests/wellformed/atom/entry_summary_escaped_markup.xml +9 -0
  1874. data/tests/wellformed/atom/entry_summary_inline_markup.xml +9 -0
  1875. data/tests/wellformed/atom/entry_summary_inline_markup_2.xml +9 -0
  1876. data/tests/wellformed/atom/entry_summary_naked_markup.xml +9 -0
  1877. data/tests/wellformed/atom/entry_summary_text_plain.xml +9 -0
  1878. data/tests/wellformed/atom/entry_title.xml +9 -0
  1879. data/tests/wellformed/atom/entry_title_base64.xml +11 -0
  1880. data/tests/wellformed/atom/entry_title_base64_2.xml +11 -0
  1881. data/tests/wellformed/atom/entry_title_content_mode_base64.xml +11 -0
  1882. data/tests/wellformed/atom/entry_title_content_mode_escaped.xml +9 -0
  1883. data/tests/wellformed/atom/entry_title_content_type.xml +9 -0
  1884. data/tests/wellformed/atom/entry_title_content_type_text_plain.xml +9 -0
  1885. data/tests/wellformed/atom/entry_title_content_value.xml +9 -0
  1886. data/tests/wellformed/atom/entry_title_escaped_markup.xml +9 -0
  1887. data/tests/wellformed/atom/entry_title_inline_markup.xml +9 -0
  1888. data/tests/wellformed/atom/entry_title_inline_markup_2.xml +9 -0
  1889. data/tests/wellformed/atom/entry_title_naked_markup.xml +9 -0
  1890. data/tests/wellformed/atom/entry_title_text_plain.xml +9 -0
  1891. data/tests/wellformed/atom/entry_title_text_plain_brackets.xml +9 -0
  1892. data/tests/wellformed/atom/feed_author_email.xml +11 -0
  1893. data/tests/wellformed/atom/feed_author_homepage.xml +11 -0
  1894. data/tests/wellformed/atom/feed_author_map_author.xml +11 -0
  1895. data/tests/wellformed/atom/feed_author_map_author_2.xml +10 -0
  1896. data/tests/wellformed/atom/feed_author_name.xml +11 -0
  1897. data/tests/wellformed/atom/feed_author_uri.xml +11 -0
  1898. data/tests/wellformed/atom/feed_author_url.xml +11 -0
  1899. data/tests/wellformed/atom/feed_contributor_email.xml +11 -0
  1900. data/tests/wellformed/atom/feed_contributor_homepage.xml +11 -0
  1901. data/tests/wellformed/atom/feed_contributor_multiple.xml +16 -0
  1902. data/tests/wellformed/atom/feed_contributor_name.xml +11 -0
  1903. data/tests/wellformed/atom/feed_contributor_uri.xml +11 -0
  1904. data/tests/wellformed/atom/feed_contributor_url.xml +11 -0
  1905. data/tests/wellformed/atom/feed_copyright.xml +7 -0
  1906. data/tests/wellformed/atom/feed_copyright_base64.xml +9 -0
  1907. data/tests/wellformed/atom/feed_copyright_base64_2.xml +9 -0
  1908. data/tests/wellformed/atom/feed_copyright_content_mode_base64.xml +9 -0
  1909. data/tests/wellformed/atom/feed_copyright_content_mode_escaped.xml +7 -0
  1910. data/tests/wellformed/atom/feed_copyright_content_type.xml +7 -0
  1911. data/tests/wellformed/atom/feed_copyright_content_type_text_plain.xml +7 -0
  1912. data/tests/wellformed/atom/feed_copyright_content_value.xml +7 -0
  1913. data/tests/wellformed/atom/feed_copyright_escaped_markup.xml +7 -0
  1914. data/tests/wellformed/atom/feed_copyright_inline_markup.xml +7 -0
  1915. data/tests/wellformed/atom/feed_copyright_inline_markup_2.xml +7 -0
  1916. data/tests/wellformed/atom/feed_copyright_naked_markup.xml +7 -0
  1917. data/tests/wellformed/atom/feed_copyright_text_plain.xml +7 -0
  1918. data/tests/wellformed/atom/feed_generator.xml +7 -0
  1919. data/tests/wellformed/atom/feed_generator_name.xml +7 -0
  1920. data/tests/wellformed/atom/feed_generator_url.xml +7 -0
  1921. data/tests/wellformed/atom/feed_generator_version.xml +7 -0
  1922. data/tests/wellformed/atom/feed_id.xml +7 -0
  1923. data/tests/wellformed/atom/feed_id_map_guid.xml +7 -0
  1924. data/tests/wellformed/atom/feed_info.xml +7 -0
  1925. data/tests/wellformed/atom/feed_info_base64.xml +9 -0
  1926. data/tests/wellformed/atom/feed_info_base64_2.xml +9 -0
  1927. data/tests/wellformed/atom/feed_info_content_mode_base64.xml +9 -0
  1928. data/tests/wellformed/atom/feed_info_content_mode_escaped.xml +7 -0
  1929. data/tests/wellformed/atom/feed_info_content_type.xml +7 -0
  1930. data/tests/wellformed/atom/feed_info_content_type_text_plain.xml +7 -0
  1931. data/tests/wellformed/atom/feed_info_content_value.xml +7 -0
  1932. data/tests/wellformed/atom/feed_info_escaped_markup.xml +7 -0
  1933. data/tests/wellformed/atom/feed_info_inline_markup.xml +7 -0
  1934. data/tests/wellformed/atom/feed_info_inline_markup_2.xml +7 -0
  1935. data/tests/wellformed/atom/feed_info_naked_markup.xml +7 -0
  1936. data/tests/wellformed/atom/feed_info_text_plain.xml +7 -0
  1937. data/tests/wellformed/atom/feed_link_alternate_map_link.xml +7 -0
  1938. data/tests/wellformed/atom/feed_link_alternate_map_link_2.xml +7 -0
  1939. data/tests/wellformed/atom/feed_link_href.xml +7 -0
  1940. data/tests/wellformed/atom/feed_link_multiple.xml +8 -0
  1941. data/tests/wellformed/atom/feed_link_rel.xml +7 -0
  1942. data/tests/wellformed/atom/feed_link_title.xml +7 -0
  1943. data/tests/wellformed/atom/feed_link_type.xml +7 -0
  1944. data/tests/wellformed/atom/feed_tagline.xml +7 -0
  1945. data/tests/wellformed/atom/feed_tagline_base64.xml +9 -0
  1946. data/tests/wellformed/atom/feed_tagline_base64_2.xml +9 -0
  1947. data/tests/wellformed/atom/feed_tagline_content_mode_base64.xml +9 -0
  1948. data/tests/wellformed/atom/feed_tagline_content_mode_escaped.xml +7 -0
  1949. data/tests/wellformed/atom/feed_tagline_content_type.xml +7 -0
  1950. data/tests/wellformed/atom/feed_tagline_content_type_text_plain.xml +7 -0
  1951. data/tests/wellformed/atom/feed_tagline_content_value.xml +7 -0
  1952. data/tests/wellformed/atom/feed_tagline_escaped_markup.xml +7 -0
  1953. data/tests/wellformed/atom/feed_tagline_inline_markup.xml +7 -0
  1954. data/tests/wellformed/atom/feed_tagline_inline_markup_2.xml +7 -0
  1955. data/tests/wellformed/atom/feed_tagline_naked_markup.xml +7 -0
  1956. data/tests/wellformed/atom/feed_tagline_text_plain.xml +7 -0
  1957. data/tests/wellformed/atom/feed_title.xml +7 -0
  1958. data/tests/wellformed/atom/feed_title_base64.xml +9 -0
  1959. data/tests/wellformed/atom/feed_title_base64_2.xml +9 -0
  1960. data/tests/wellformed/atom/feed_title_content_mode_base64.xml +9 -0
  1961. data/tests/wellformed/atom/feed_title_content_mode_escaped.xml +7 -0
  1962. data/tests/wellformed/atom/feed_title_content_type.xml +7 -0
  1963. data/tests/wellformed/atom/feed_title_content_type_text_plain.xml +7 -0
  1964. data/tests/wellformed/atom/feed_title_content_value.xml +7 -0
  1965. data/tests/wellformed/atom/feed_title_escaped_markup.xml +7 -0
  1966. data/tests/wellformed/atom/feed_title_inline_markup.xml +7 -0
  1967. data/tests/wellformed/atom/feed_title_inline_markup_2.xml +7 -0
  1968. data/tests/wellformed/atom/feed_title_naked_markup.xml +7 -0
  1969. data/tests/wellformed/atom/feed_title_text_plain.xml +7 -0
  1970. data/tests/wellformed/atom/relative_uri.xml +7 -0
  1971. data/tests/wellformed/atom/relative_uri_inherit.xml +7 -0
  1972. data/tests/wellformed/atom/relative_uri_inherit_2.xml +7 -0
  1973. data/tests/wellformed/atom10/atom10_namespace.xml +7 -0
  1974. data/tests/wellformed/atom10/atom10_version.xml +6 -0
  1975. data/tests/wellformed/atom10/entry_author_email.xml +13 -0
  1976. data/tests/wellformed/atom10/entry_author_map_author.xml +13 -0
  1977. data/tests/wellformed/atom10/entry_author_map_author_2.xml +12 -0
  1978. data/tests/wellformed/atom10/entry_author_name.xml +13 -0
  1979. data/tests/wellformed/atom10/entry_author_uri.xml +13 -0
  1980. data/tests/wellformed/atom10/entry_author_url.xml +13 -0
  1981. data/tests/wellformed/atom10/entry_category_label.xml +9 -0
  1982. data/tests/wellformed/atom10/entry_category_scheme.xml +9 -0
  1983. data/tests/wellformed/atom10/entry_category_term.xml +9 -0
  1984. data/tests/wellformed/atom10/entry_content_application_xml.xml +9 -0
  1985. data/tests/wellformed/atom10/entry_content_base64.xml +11 -0
  1986. data/tests/wellformed/atom10/entry_content_base64_2.xml +11 -0
  1987. data/tests/wellformed/atom10/entry_content_escaped_markup.xml +9 -0
  1988. data/tests/wellformed/atom10/entry_content_inline_markup.xml +9 -0
  1989. data/tests/wellformed/atom10/entry_content_inline_markup_2.xml +9 -0
  1990. data/tests/wellformed/atom10/entry_content_src.xml +9 -0
  1991. data/tests/wellformed/atom10/entry_content_text_plain.xml +9 -0
  1992. data/tests/wellformed/atom10/entry_content_text_plain_brackets.xml +9 -0
  1993. data/tests/wellformed/atom10/entry_content_type.xml +9 -0
  1994. data/tests/wellformed/atom10/entry_content_type_text.xml +9 -0
  1995. data/tests/wellformed/atom10/entry_content_value.xml +9 -0
  1996. data/tests/wellformed/atom10/entry_contributor_email.xml +13 -0
  1997. data/tests/wellformed/atom10/entry_contributor_multiple.xml +18 -0
  1998. data/tests/wellformed/atom10/entry_contributor_name.xml +13 -0
  1999. data/tests/wellformed/atom10/entry_contributor_uri.xml +13 -0
  2000. data/tests/wellformed/atom10/entry_contributor_url.xml +13 -0
  2001. data/tests/wellformed/atom10/entry_id.xml +9 -0
  2002. data/tests/wellformed/atom10/entry_id_map_guid.xml +9 -0
  2003. data/tests/wellformed/atom10/entry_id_no_normalization_1.xml +9 -0
  2004. data/tests/wellformed/atom10/entry_id_no_normalization_2.xml +9 -0
  2005. data/tests/wellformed/atom10/entry_id_no_normalization_3.xml +9 -0
  2006. data/tests/wellformed/atom10/entry_id_no_normalization_4.xml +9 -0
  2007. data/tests/wellformed/atom10/entry_id_no_normalization_5.xml +9 -0
  2008. data/tests/wellformed/atom10/entry_id_no_normalization_6.xml +9 -0
  2009. data/tests/wellformed/atom10/entry_id_no_normalization_7.xml +9 -0
  2010. data/tests/wellformed/atom10/entry_link_alternate_map_link.xml +9 -0
  2011. data/tests/wellformed/atom10/entry_link_alternate_map_link_2.xml +9 -0
  2012. data/tests/wellformed/atom10/entry_link_alternate_map_link_3.xml +11 -0
  2013. data/tests/wellformed/atom10/entry_link_href.xml +9 -0
  2014. data/tests/wellformed/atom10/entry_link_hreflang.xml +9 -0
  2015. data/tests/wellformed/atom10/entry_link_length.xml +9 -0
  2016. data/tests/wellformed/atom10/entry_link_multiple.xml +10 -0
  2017. data/tests/wellformed/atom10/entry_link_no_rel.xml +9 -0
  2018. data/tests/wellformed/atom10/entry_link_rel.xml +9 -0
  2019. data/tests/wellformed/atom10/entry_link_rel_enclosure.xml +9 -0
  2020. data/tests/wellformed/atom10/entry_link_rel_enclosure_map_enclosure_length.xml +9 -0
  2021. data/tests/wellformed/atom10/entry_link_rel_enclosure_map_enclosure_type.xml +9 -0
  2022. data/tests/wellformed/atom10/entry_link_rel_enclosure_map_enclosure_url.xml +9 -0
  2023. data/tests/wellformed/atom10/entry_link_rel_other.xml +9 -0
  2024. data/tests/wellformed/atom10/entry_link_rel_related.xml +9 -0
  2025. data/tests/wellformed/atom10/entry_link_rel_self.xml +9 -0
  2026. data/tests/wellformed/atom10/entry_link_rel_via.xml +9 -0
  2027. data/tests/wellformed/atom10/entry_link_title.xml +9 -0
  2028. data/tests/wellformed/atom10/entry_link_type.xml +9 -0
  2029. data/tests/wellformed/atom10/entry_rights.xml +9 -0
  2030. data/tests/wellformed/atom10/entry_rights_content_value.xml +9 -0
  2031. data/tests/wellformed/atom10/entry_rights_escaped_markup.xml +9 -0
  2032. data/tests/wellformed/atom10/entry_rights_inline_markup.xml +9 -0
  2033. data/tests/wellformed/atom10/entry_rights_inline_markup_2.xml +9 -0
  2034. data/tests/wellformed/atom10/entry_rights_text_plain.xml +9 -0
  2035. data/tests/wellformed/atom10/entry_rights_text_plain_brackets.xml +9 -0
  2036. data/tests/wellformed/atom10/entry_rights_type_default.xml +9 -0
  2037. data/tests/wellformed/atom10/entry_rights_type_text.xml +9 -0
  2038. data/tests/wellformed/atom10/entry_source_author_email.xml +15 -0
  2039. data/tests/wellformed/atom10/entry_source_author_map_author.xml +15 -0
  2040. data/tests/wellformed/atom10/entry_source_author_map_author_2.xml +14 -0
  2041. data/tests/wellformed/atom10/entry_source_author_name.xml +15 -0
  2042. data/tests/wellformed/atom10/entry_source_author_uri.xml +15 -0
  2043. data/tests/wellformed/atom10/entry_source_category_label.xml +11 -0
  2044. data/tests/wellformed/atom10/entry_source_category_scheme.xml +11 -0
  2045. data/tests/wellformed/atom10/entry_source_category_term.xml +11 -0
  2046. data/tests/wellformed/atom10/entry_source_contributor_email.xml +15 -0
  2047. data/tests/wellformed/atom10/entry_source_contributor_multiple.xml +20 -0
  2048. data/tests/wellformed/atom10/entry_source_contributor_name.xml +15 -0
  2049. data/tests/wellformed/atom10/entry_source_contributor_uri.xml +15 -0
  2050. data/tests/wellformed/atom10/entry_source_generator.xml +11 -0
  2051. data/tests/wellformed/atom10/entry_source_generator_name.xml +11 -0
  2052. data/tests/wellformed/atom10/entry_source_generator_uri.xml +11 -0
  2053. data/tests/wellformed/atom10/entry_source_generator_version.xml +11 -0
  2054. data/tests/wellformed/atom10/entry_source_icon.xml +11 -0
  2055. data/tests/wellformed/atom10/entry_source_id.xml +11 -0
  2056. data/tests/wellformed/atom10/entry_source_link_alternate_map_link.xml +11 -0
  2057. data/tests/wellformed/atom10/entry_source_link_alternate_map_link_2.xml +11 -0
  2058. data/tests/wellformed/atom10/entry_source_link_href.xml +11 -0
  2059. data/tests/wellformed/atom10/entry_source_link_hreflang.xml +11 -0
  2060. data/tests/wellformed/atom10/entry_source_link_length.xml +11 -0
  2061. data/tests/wellformed/atom10/entry_source_link_multiple.xml +12 -0
  2062. data/tests/wellformed/atom10/entry_source_link_no_rel.xml +11 -0
  2063. data/tests/wellformed/atom10/entry_source_link_rel.xml +11 -0
  2064. data/tests/wellformed/atom10/entry_source_link_rel_other.xml +11 -0
  2065. data/tests/wellformed/atom10/entry_source_link_rel_related.xml +11 -0
  2066. data/tests/wellformed/atom10/entry_source_link_rel_self.xml +11 -0
  2067. data/tests/wellformed/atom10/entry_source_link_rel_via.xml +11 -0
  2068. data/tests/wellformed/atom10/entry_source_link_title.xml +11 -0
  2069. data/tests/wellformed/atom10/entry_source_link_type.xml +11 -0
  2070. data/tests/wellformed/atom10/entry_source_logo.xml +11 -0
  2071. data/tests/wellformed/atom10/entry_source_rights.xml +11 -0
  2072. data/tests/wellformed/atom10/entry_source_rights_base64.xml +13 -0
  2073. data/tests/wellformed/atom10/entry_source_rights_base64_2.xml +13 -0
  2074. data/tests/wellformed/atom10/entry_source_rights_content_type.xml +11 -0
  2075. data/tests/wellformed/atom10/entry_source_rights_content_type_text.xml +11 -0
  2076. data/tests/wellformed/atom10/entry_source_rights_content_value.xml +11 -0
  2077. data/tests/wellformed/atom10/entry_source_rights_escaped_markup.xml +11 -0
  2078. data/tests/wellformed/atom10/entry_source_rights_inline_markup.xml +11 -0
  2079. data/tests/wellformed/atom10/entry_source_rights_inline_markup_2.xml +11 -0
  2080. data/tests/wellformed/atom10/entry_source_rights_text_plain.xml +11 -0
  2081. data/tests/wellformed/atom10/entry_source_subittle_content_type_text.xml +11 -0
  2082. data/tests/wellformed/atom10/entry_source_subtitle.xml +11 -0
  2083. data/tests/wellformed/atom10/entry_source_subtitle_base64.xml +13 -0
  2084. data/tests/wellformed/atom10/entry_source_subtitle_base64_2.xml +13 -0
  2085. data/tests/wellformed/atom10/entry_source_subtitle_content_type.xml +11 -0
  2086. data/tests/wellformed/atom10/entry_source_subtitle_content_value.xml +11 -0
  2087. data/tests/wellformed/atom10/entry_source_subtitle_escaped_markup.xml +11 -0
  2088. data/tests/wellformed/atom10/entry_source_subtitle_inline_markup.xml +11 -0
  2089. data/tests/wellformed/atom10/entry_source_subtitle_inline_markup_2.xml +11 -0
  2090. data/tests/wellformed/atom10/entry_source_subtitle_text_plain.xml +11 -0
  2091. data/tests/wellformed/atom10/entry_source_title.xml +11 -0
  2092. data/tests/wellformed/atom10/entry_source_title_base64.xml +13 -0
  2093. data/tests/wellformed/atom10/entry_source_title_base64_2.xml +13 -0
  2094. data/tests/wellformed/atom10/entry_source_title_content_type.xml +11 -0
  2095. data/tests/wellformed/atom10/entry_source_title_content_type_text.xml +11 -0
  2096. data/tests/wellformed/atom10/entry_source_title_content_value.xml +11 -0
  2097. data/tests/wellformed/atom10/entry_source_title_escaped_markup.xml +11 -0
  2098. data/tests/wellformed/atom10/entry_source_title_inline_markup.xml +11 -0
  2099. data/tests/wellformed/atom10/entry_source_title_inline_markup_2.xml +11 -0
  2100. data/tests/wellformed/atom10/entry_source_title_text_plain.xml +11 -0
  2101. data/tests/wellformed/atom10/entry_summary.xml +9 -0
  2102. data/tests/wellformed/atom10/entry_summary_base64.xml +11 -0
  2103. data/tests/wellformed/atom10/entry_summary_base64_2.xml +11 -0
  2104. data/tests/wellformed/atom10/entry_summary_content_value.xml +9 -0
  2105. data/tests/wellformed/atom10/entry_summary_escaped_markup.xml +9 -0
  2106. data/tests/wellformed/atom10/entry_summary_inline_markup.xml +9 -0
  2107. data/tests/wellformed/atom10/entry_summary_inline_markup_2.xml +9 -0
  2108. data/tests/wellformed/atom10/entry_summary_text_plain.xml +9 -0
  2109. data/tests/wellformed/atom10/entry_summary_type_default.xml +9 -0
  2110. data/tests/wellformed/atom10/entry_summary_type_text.xml +9 -0
  2111. data/tests/wellformed/atom10/entry_title.xml +9 -0
  2112. data/tests/wellformed/atom10/entry_title_base64.xml +11 -0
  2113. data/tests/wellformed/atom10/entry_title_base64_2.xml +11 -0
  2114. data/tests/wellformed/atom10/entry_title_content_value.xml +9 -0
  2115. data/tests/wellformed/atom10/entry_title_escaped_markup.xml +9 -0
  2116. data/tests/wellformed/atom10/entry_title_inline_markup.xml +9 -0
  2117. data/tests/wellformed/atom10/entry_title_inline_markup_2.xml +9 -0
  2118. data/tests/wellformed/atom10/entry_title_text_plain.xml +9 -0
  2119. data/tests/wellformed/atom10/entry_title_text_plain_brackets.xml +9 -0
  2120. data/tests/wellformed/atom10/entry_title_type_default.xml +9 -0
  2121. data/tests/wellformed/atom10/entry_title_type_text.xml +9 -0
  2122. data/tests/wellformed/atom10/feed_author_email.xml +11 -0
  2123. data/tests/wellformed/atom10/feed_author_map_author.xml +11 -0
  2124. data/tests/wellformed/atom10/feed_author_map_author_2.xml +10 -0
  2125. data/tests/wellformed/atom10/feed_author_name.xml +11 -0
  2126. data/tests/wellformed/atom10/feed_author_uri.xml +11 -0
  2127. data/tests/wellformed/atom10/feed_author_url.xml +11 -0
  2128. data/tests/wellformed/atom10/feed_contributor_email.xml +11 -0
  2129. data/tests/wellformed/atom10/feed_contributor_multiple.xml +16 -0
  2130. data/tests/wellformed/atom10/feed_contributor_name.xml +11 -0
  2131. data/tests/wellformed/atom10/feed_contributor_uri.xml +11 -0
  2132. data/tests/wellformed/atom10/feed_contributor_url.xml +11 -0
  2133. data/tests/wellformed/atom10/feed_generator.xml +7 -0
  2134. data/tests/wellformed/atom10/feed_generator_name.xml +7 -0
  2135. data/tests/wellformed/atom10/feed_generator_url.xml +7 -0
  2136. data/tests/wellformed/atom10/feed_generator_version.xml +7 -0
  2137. data/tests/wellformed/atom10/feed_icon.xml +7 -0
  2138. data/tests/wellformed/atom10/feed_id.xml +7 -0
  2139. data/tests/wellformed/atom10/feed_id_map_guid.xml +7 -0
  2140. data/tests/wellformed/atom10/feed_link_alternate_map_link.xml +7 -0
  2141. data/tests/wellformed/atom10/feed_link_alternate_map_link_2.xml +7 -0
  2142. data/tests/wellformed/atom10/feed_link_href.xml +7 -0
  2143. data/tests/wellformed/atom10/feed_link_hreflang.xml +7 -0
  2144. data/tests/wellformed/atom10/feed_link_length.xml +7 -0
  2145. data/tests/wellformed/atom10/feed_link_multiple.xml +8 -0
  2146. data/tests/wellformed/atom10/feed_link_no_rel.xml +7 -0
  2147. data/tests/wellformed/atom10/feed_link_rel.xml +7 -0
  2148. data/tests/wellformed/atom10/feed_link_rel_other.xml +7 -0
  2149. data/tests/wellformed/atom10/feed_link_rel_related.xml +7 -0
  2150. data/tests/wellformed/atom10/feed_link_rel_self.xml +7 -0
  2151. data/tests/wellformed/atom10/feed_link_rel_via.xml +7 -0
  2152. data/tests/wellformed/atom10/feed_link_title.xml +7 -0
  2153. data/tests/wellformed/atom10/feed_link_type.xml +7 -0
  2154. data/tests/wellformed/atom10/feed_logo.xml +7 -0
  2155. data/tests/wellformed/atom10/feed_rights.xml +7 -0
  2156. data/tests/wellformed/atom10/feed_rights_base64.xml +9 -0
  2157. data/tests/wellformed/atom10/feed_rights_base64_2.xml +9 -0
  2158. data/tests/wellformed/atom10/feed_rights_content_type.xml +7 -0
  2159. data/tests/wellformed/atom10/feed_rights_content_type_text.xml +7 -0
  2160. data/tests/wellformed/atom10/feed_rights_content_value.xml +7 -0
  2161. data/tests/wellformed/atom10/feed_rights_escaped_markup.xml +7 -0
  2162. data/tests/wellformed/atom10/feed_rights_inline_markup.xml +7 -0
  2163. data/tests/wellformed/atom10/feed_rights_inline_markup_2.xml +7 -0
  2164. data/tests/wellformed/atom10/feed_rights_text_plain.xml +7 -0
  2165. data/tests/wellformed/atom10/feed_subtitle.xml +7 -0
  2166. data/tests/wellformed/atom10/feed_subtitle_base64.xml +9 -0
  2167. data/tests/wellformed/atom10/feed_subtitle_base64_2.xml +9 -0
  2168. data/tests/wellformed/atom10/feed_subtitle_content_type.xml +7 -0
  2169. data/tests/wellformed/atom10/feed_subtitle_content_type_text.xml +7 -0
  2170. data/tests/wellformed/atom10/feed_subtitle_content_value.xml +7 -0
  2171. data/tests/wellformed/atom10/feed_subtitle_escaped_markup.xml +7 -0
  2172. data/tests/wellformed/atom10/feed_subtitle_inline_markup.xml +7 -0
  2173. data/tests/wellformed/atom10/feed_subtitle_inline_markup_2.xml +7 -0
  2174. data/tests/wellformed/atom10/feed_subtitle_text_plain.xml +7 -0
  2175. data/tests/wellformed/atom10/feed_title.xml +7 -0
  2176. data/tests/wellformed/atom10/feed_title_base64.xml +9 -0
  2177. data/tests/wellformed/atom10/feed_title_base64_2.xml +9 -0
  2178. data/tests/wellformed/atom10/feed_title_content_type.xml +7 -0
  2179. data/tests/wellformed/atom10/feed_title_content_type_text.xml +7 -0
  2180. data/tests/wellformed/atom10/feed_title_content_value.xml +7 -0
  2181. data/tests/wellformed/atom10/feed_title_escaped_markup.xml +7 -0
  2182. data/tests/wellformed/atom10/feed_title_inline_markup.xml +7 -0
  2183. data/tests/wellformed/atom10/feed_title_inline_markup_2.xml +7 -0
  2184. data/tests/wellformed/atom10/feed_title_text_plain.xml +7 -0
  2185. data/tests/wellformed/atom10/relative_uri.xml +7 -0
  2186. data/tests/wellformed/atom10/relative_uri_inherit.xml +7 -0
  2187. data/tests/wellformed/atom10/relative_uri_inherit_2.xml +7 -0
  2188. data/tests/wellformed/base/cdf_item_abstract_xml_base.xml +18 -0
  2189. data/tests/wellformed/base/entry_content_xml_base.xml +9 -0
  2190. data/tests/wellformed/base/entry_content_xml_base_inherit.xml +9 -0
  2191. data/tests/wellformed/base/entry_content_xml_base_inherit_2.xml +9 -0
  2192. data/tests/wellformed/base/entry_content_xml_base_inherit_3.xml +10 -0
  2193. data/tests/wellformed/base/entry_content_xml_base_inherit_4.xml +10 -0
  2194. data/tests/wellformed/base/entry_summary_xml_base.xml +9 -0
  2195. data/tests/wellformed/base/entry_summary_xml_base_inherit.xml +9 -0
  2196. data/tests/wellformed/base/entry_summary_xml_base_inherit_2.xml +9 -0
  2197. data/tests/wellformed/base/entry_summary_xml_base_inherit_3.xml +10 -0
  2198. data/tests/wellformed/base/entry_summary_xml_base_inherit_4.xml +10 -0
  2199. data/tests/wellformed/base/entry_title_xml_base.xml +9 -0
  2200. data/tests/wellformed/base/entry_title_xml_base_inherit.xml +9 -0
  2201. data/tests/wellformed/base/entry_title_xml_base_inherit_2.xml +9 -0
  2202. data/tests/wellformed/base/entry_title_xml_base_inherit_3.xml +10 -0
  2203. data/tests/wellformed/base/entry_title_xml_base_inherit_4.xml +10 -0
  2204. data/tests/wellformed/base/feed_copyright_xml_base.xml +7 -0
  2205. data/tests/wellformed/base/feed_copyright_xml_base_inherit.xml +7 -0
  2206. data/tests/wellformed/base/feed_copyright_xml_base_inherit_2.xml +7 -0
  2207. data/tests/wellformed/base/feed_copyright_xml_base_inherit_3.xml +8 -0
  2208. data/tests/wellformed/base/feed_copyright_xml_base_inherit_4.xml +8 -0
  2209. data/tests/wellformed/base/feed_info_xml_base.xml +7 -0
  2210. data/tests/wellformed/base/feed_info_xml_base_inherit.xml +7 -0
  2211. data/tests/wellformed/base/feed_info_xml_base_inherit_2.xml +7 -0
  2212. data/tests/wellformed/base/feed_info_xml_base_inherit_3.xml +8 -0
  2213. data/tests/wellformed/base/feed_info_xml_base_inherit_4.xml +8 -0
  2214. data/tests/wellformed/base/feed_tagline_xml_base.xml +7 -0
  2215. data/tests/wellformed/base/feed_tagline_xml_base_inherit.xml +7 -0
  2216. data/tests/wellformed/base/feed_tagline_xml_base_inherit_2.xml +7 -0
  2217. data/tests/wellformed/base/feed_tagline_xml_base_inherit_3.xml +8 -0
  2218. data/tests/wellformed/base/feed_tagline_xml_base_inherit_4.xml +8 -0
  2219. data/tests/wellformed/base/feed_title_xml_base.xml +7 -0
  2220. data/tests/wellformed/base/feed_title_xml_base_inherit.xml +7 -0
  2221. data/tests/wellformed/base/feed_title_xml_base_inherit_2.xml +7 -0
  2222. data/tests/wellformed/base/feed_title_xml_base_inherit_3.xml +8 -0
  2223. data/tests/wellformed/base/feed_title_xml_base_inherit_4.xml +8 -0
  2224. data/tests/wellformed/base/http_channel_docs_base_content_location.xml +10 -0
  2225. data/tests/wellformed/base/http_channel_docs_base_docuri.xml +9 -0
  2226. data/tests/wellformed/base/http_channel_link_base_content_location.xml +10 -0
  2227. data/tests/wellformed/base/http_channel_link_base_docuri.xml +9 -0
  2228. data/tests/wellformed/base/http_entry_author_url_base_content_location.xml +12 -0
  2229. data/tests/wellformed/base/http_entry_author_url_base_docuri.xml +11 -0
  2230. data/tests/wellformed/base/http_entry_content_base64_base_content_location.xml +12 -0
  2231. data/tests/wellformed/base/http_entry_content_base64_base_docuri.xml +11 -0
  2232. data/tests/wellformed/base/http_entry_content_base_content_location.xml +10 -0
  2233. data/tests/wellformed/base/http_entry_content_base_docuri.xml +9 -0
  2234. data/tests/wellformed/base/http_entry_content_inline_base_content_location.xml +10 -0
  2235. data/tests/wellformed/base/http_entry_content_inline_base_docuri.xml +9 -0
  2236. data/tests/wellformed/base/http_entry_contributor_url_base_content_location.xml +12 -0
  2237. data/tests/wellformed/base/http_entry_contributor_url_base_docuri.xml +11 -0
  2238. data/tests/wellformed/base/http_entry_id_base_content_location.xml +10 -0
  2239. data/tests/wellformed/base/http_entry_id_base_docuri.xml +9 -0
  2240. data/tests/wellformed/base/http_entry_link_base_content_location.xml +10 -0
  2241. data/tests/wellformed/base/http_entry_link_base_docuri.xml +9 -0
  2242. data/tests/wellformed/base/http_entry_summary_base64_base_content_location.xml +12 -0
  2243. data/tests/wellformed/base/http_entry_summary_base64_base_docuri.xml +11 -0
  2244. data/tests/wellformed/base/http_entry_summary_base_content_location.xml +10 -0
  2245. data/tests/wellformed/base/http_entry_summary_base_docuri.xml +9 -0
  2246. data/tests/wellformed/base/http_entry_summary_inline_base_content_location.xml +10 -0
  2247. data/tests/wellformed/base/http_entry_summary_inline_base_docuri.xml +9 -0
  2248. data/tests/wellformed/base/http_entry_title_base64_base_content_location.xml +12 -0
  2249. data/tests/wellformed/base/http_entry_title_base64_base_docuri.xml +11 -0
  2250. data/tests/wellformed/base/http_entry_title_base_content_location.xml +10 -0
  2251. data/tests/wellformed/base/http_entry_title_base_docuri.xml +9 -0
  2252. data/tests/wellformed/base/http_entry_title_inline_base_content_location.xml +10 -0
  2253. data/tests/wellformed/base/http_entry_title_inline_base_docuri.xml +9 -0
  2254. data/tests/wellformed/base/http_feed_author_url_base_content_location.xml +10 -0
  2255. data/tests/wellformed/base/http_feed_author_url_base_docuri.xml +9 -0
  2256. data/tests/wellformed/base/http_feed_contributor_url_base_content_location.xml +10 -0
  2257. data/tests/wellformed/base/http_feed_contributor_url_base_docuri.xml +9 -0
  2258. data/tests/wellformed/base/http_feed_copyright_base64_base_content_location.xml +10 -0
  2259. data/tests/wellformed/base/http_feed_copyright_base64_base_docuri.xml +9 -0
  2260. data/tests/wellformed/base/http_feed_copyright_base_content_location.xml +8 -0
  2261. data/tests/wellformed/base/http_feed_copyright_base_docuri.xml +7 -0
  2262. data/tests/wellformed/base/http_feed_copyright_inline_base_content_location.xml +8 -0
  2263. data/tests/wellformed/base/http_feed_copyright_inline_base_docuri.xml +7 -0
  2264. data/tests/wellformed/base/http_feed_generator_url_base_content_location.xml +8 -0
  2265. data/tests/wellformed/base/http_feed_generator_url_base_docuri.xml +7 -0
  2266. data/tests/wellformed/base/http_feed_id_base_content_location.xml +8 -0
  2267. data/tests/wellformed/base/http_feed_id_base_docuri.xml +7 -0
  2268. data/tests/wellformed/base/http_feed_info_base64_base_content_location.xml +10 -0
  2269. data/tests/wellformed/base/http_feed_info_base64_base_docuri.xml +9 -0
  2270. data/tests/wellformed/base/http_feed_info_base_content_location.xml +8 -0
  2271. data/tests/wellformed/base/http_feed_info_base_docuri.xml +7 -0
  2272. data/tests/wellformed/base/http_feed_info_inline_base_content_location.xml +8 -0
  2273. data/tests/wellformed/base/http_feed_info_inline_base_docuri.xml +7 -0
  2274. data/tests/wellformed/base/http_feed_link_base_content_location.xml +8 -0
  2275. data/tests/wellformed/base/http_feed_link_base_docuri.xml +7 -0
  2276. data/tests/wellformed/base/http_feed_tagline_base64_base_content_location.xml +10 -0
  2277. data/tests/wellformed/base/http_feed_tagline_base64_base_docuri.xml +9 -0
  2278. data/tests/wellformed/base/http_feed_tagline_base_content_location.xml +8 -0
  2279. data/tests/wellformed/base/http_feed_tagline_base_docuri.xml +7 -0
  2280. data/tests/wellformed/base/http_feed_tagline_inline_base_content_location.xml +8 -0
  2281. data/tests/wellformed/base/http_feed_tagline_inline_base_docuri.xml +7 -0
  2282. data/tests/wellformed/base/http_feed_title_base64_base_content_location.xml +10 -0
  2283. data/tests/wellformed/base/http_feed_title_base64_base_docuri.xml +9 -0
  2284. data/tests/wellformed/base/http_feed_title_base_content_location.xml +8 -0
  2285. data/tests/wellformed/base/http_feed_title_base_docuri.xml +7 -0
  2286. data/tests/wellformed/base/http_feed_title_inline_base_content_location.xml +8 -0
  2287. data/tests/wellformed/base/http_feed_title_inline_base_docuri.xml +7 -0
  2288. data/tests/wellformed/base/http_item_body_base_content_location.xml +12 -0
  2289. data/tests/wellformed/base/http_item_body_base_docuri.xml +11 -0
  2290. data/tests/wellformed/base/http_item_comments_base_content_location.xml +12 -0
  2291. data/tests/wellformed/base/http_item_comments_base_docuri.xml +11 -0
  2292. data/tests/wellformed/base/http_item_content_encoded_base_content_location.xml +12 -0
  2293. data/tests/wellformed/base/http_item_content_encoded_base_docuri.xml +11 -0
  2294. data/tests/wellformed/base/http_item_description_base_content_location.xml +12 -0
  2295. data/tests/wellformed/base/http_item_description_base_docuri.xml +11 -0
  2296. data/tests/wellformed/base/http_item_fullitem_base_content_location.xml +12 -0
  2297. data/tests/wellformed/base/http_item_fullitem_base_docuri.xml +11 -0
  2298. data/tests/wellformed/base/http_item_link_base_content_location.xml +12 -0
  2299. data/tests/wellformed/base/http_item_link_base_docuri.xml +11 -0
  2300. data/tests/wellformed/base/http_item_wfw_commentRSS_base_content_location.xml +12 -0
  2301. data/tests/wellformed/base/http_item_wfw_commentRSS_base_docuri.xml +11 -0
  2302. data/tests/wellformed/base/http_item_wfw_comment_base_content_location.xml +12 -0
  2303. data/tests/wellformed/base/http_item_wfw_comment_base_docuri.xml +11 -0
  2304. data/tests/wellformed/base/http_item_xhtml_body_base_content_location.xml +12 -0
  2305. data/tests/wellformed/base/http_item_xhtml_body_base_docuri.xml +11 -0
  2306. data/tests/wellformed/base/http_relative_xml_base.xml +10 -0
  2307. data/tests/wellformed/base/http_relative_xml_base_2.xml +10 -0
  2308. data/tests/wellformed/base/malformed_base.xml +9 -0
  2309. data/tests/wellformed/base/relative_xml_base.xml +9 -0
  2310. data/tests/wellformed/base/relative_xml_base_2.xml +9 -0
  2311. data/tests/wellformed/cdf/channel_abstract_map_description.xml +7 -0
  2312. data/tests/wellformed/cdf/channel_abstract_map_tagline.xml +7 -0
  2313. data/tests/wellformed/cdf/channel_href_map_link.xml +6 -0
  2314. data/tests/wellformed/cdf/channel_href_map_links.xml +6 -0
  2315. data/tests/wellformed/cdf/channel_title.xml +7 -0
  2316. data/tests/wellformed/cdf/item_abstract_map_description.xml +9 -0
  2317. data/tests/wellformed/cdf/item_abstract_map_summary.xml +9 -0
  2318. data/tests/wellformed/cdf/item_href_map_link.xml +8 -0
  2319. data/tests/wellformed/cdf/item_href_map_links.xml +8 -0
  2320. data/tests/wellformed/cdf/item_title.xml +9 -0
  2321. data/tests/wellformed/date/cdf_channel_lastmod_map_date.xml +6 -0
  2322. data/tests/wellformed/date/cdf_channel_lastmod_map_modified.xml +6 -0
  2323. data/tests/wellformed/date/cdf_channel_lastmod_map_modified_parsed.xml +6 -0
  2324. data/tests/wellformed/date/cdf_item_lastmod_map_date.xml +8 -0
  2325. data/tests/wellformed/date/cdf_item_lastmod_map_modified.xml +8 -0
  2326. data/tests/wellformed/date/cdf_item_lastmod_map_modified_parsed.xml +8 -0
  2327. data/tests/wellformed/date/channel_dc_date.xml +9 -0
  2328. data/tests/wellformed/date/channel_dc_date_map_modified.xml +9 -0
  2329. data/tests/wellformed/date/channel_dc_date_w3dtf_utc.xml +9 -0
  2330. data/tests/wellformed/date/channel_dc_date_w3dtf_utc_map_modified_parsed.xml +9 -0
  2331. data/tests/wellformed/date/channel_dcterms_created.xml +9 -0
  2332. data/tests/wellformed/date/channel_dcterms_created_w3dtf_utc.xml +9 -0
  2333. data/tests/wellformed/date/channel_dcterms_issued.xml +9 -0
  2334. data/tests/wellformed/date/channel_dcterms_issued_w3dtf_utc.xml +9 -0
  2335. data/tests/wellformed/date/channel_dcterms_modified.xml +9 -0
  2336. data/tests/wellformed/date/channel_dcterms_modified_map_date.xml +9 -0
  2337. data/tests/wellformed/date/channel_dcterms_modified_w3dtf_utc.xml +9 -0
  2338. data/tests/wellformed/date/channel_dcterms_modified_w3dtf_utc_map_date.xml +9 -0
  2339. data/tests/wellformed/date/channel_pubDate.xml +9 -0
  2340. data/tests/wellformed/date/channel_pubDate_asctime.xml +9 -0
  2341. data/tests/wellformed/date/channel_pubDate_disney.xml +9 -0
  2342. data/tests/wellformed/date/channel_pubDate_disney_at.xml +9 -0
  2343. data/tests/wellformed/date/channel_pubDate_disney_ct.xml +9 -0
  2344. data/tests/wellformed/date/channel_pubDate_disney_mt.xml +9 -0
  2345. data/tests/wellformed/date/channel_pubDate_disney_pt.xml +9 -0
  2346. data/tests/wellformed/date/channel_pubDate_greek_1.xml +9 -0
  2347. data/tests/wellformed/date/channel_pubDate_hungarian_1.xml +9 -0
  2348. data/tests/wellformed/date/channel_pubDate_iso8601_ym.xml +9 -0
  2349. data/tests/wellformed/date/channel_pubDate_iso8601_ym_2.xml +9 -0
  2350. data/tests/wellformed/date/channel_pubDate_iso8601_ymd.xml +9 -0
  2351. data/tests/wellformed/date/channel_pubDate_iso8601_ymd_2.xml +9 -0
  2352. data/tests/wellformed/date/channel_pubDate_iso8601_yo_2.xml +9 -0
  2353. data/tests/wellformed/date/channel_pubDate_korean_nate.xml +11 -0
  2354. data/tests/wellformed/date/channel_pubDate_map_modified.xml +9 -0
  2355. data/tests/wellformed/date/channel_pubDate_mssql.xml +9 -0
  2356. data/tests/wellformed/date/channel_pubDate_mssql_nofraction.xml +9 -0
  2357. data/tests/wellformed/date/channel_pubDate_nosecond.xml +9 -0
  2358. data/tests/wellformed/date/channel_pubDate_notime.xml +9 -0
  2359. data/tests/wellformed/date/channel_pubDate_rfc2822.xml +9 -0
  2360. data/tests/wellformed/date/channel_pubDate_rfc2822_rollover_june_31.xml +9 -0
  2361. data/tests/wellformed/date/channel_pubDate_rfc822.xml +9 -0
  2362. data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_25h.xml +9 -0
  2363. data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_61m.xml +9 -0
  2364. data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_61s.xml +9 -0
  2365. data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_leapyear.xml +9 -0
  2366. data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_leapyear400.xml +9 -0
  2367. data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_nonleapyear.xml +9 -0
  2368. data/tests/wellformed/date/channel_pubDate_w3dtf_sf.xml +9 -0
  2369. data/tests/wellformed/date/channel_pubDate_w3dtf_tokyo.xml +9 -0
  2370. data/tests/wellformed/date/channel_pubDate_w3dtf_utc.xml +9 -0
  2371. data/tests/wellformed/date/channel_pubDate_w3dtf_y.xml +9 -0
  2372. data/tests/wellformed/date/channel_pubDate_w3dtf_ym.xml +9 -0
  2373. data/tests/wellformed/date/channel_pubDate_w3dtf_ymd.xml +9 -0
  2374. data/tests/wellformed/date/channel_pubDate_w3dtf_ymd_2.xml +9 -0
  2375. data/tests/wellformed/date/entry_created.xml +9 -0
  2376. data/tests/wellformed/date/entry_created_w3dtf_utc.xml +9 -0
  2377. data/tests/wellformed/date/entry_issued.xml +9 -0
  2378. data/tests/wellformed/date/entry_issued_w3dtf_utc.xml +9 -0
  2379. data/tests/wellformed/date/entry_modified.xml +9 -0
  2380. data/tests/wellformed/date/entry_modified_map_date.xml +9 -0
  2381. data/tests/wellformed/date/entry_modified_w3dtf_utc.xml +9 -0
  2382. data/tests/wellformed/date/entry_published_w3dtf_utc.xml +9 -0
  2383. data/tests/wellformed/date/entry_source_updated_w3dtf_utc.xml +11 -0
  2384. data/tests/wellformed/date/entry_updated_w3dtf_utc.xml +9 -0
  2385. data/tests/wellformed/date/feed_modified.xml +9 -0
  2386. data/tests/wellformed/date/feed_modified_asctime.xml +9 -0
  2387. data/tests/wellformed/date/feed_modified_disney.xml +7 -0
  2388. data/tests/wellformed/date/feed_modified_disney_at.xml +7 -0
  2389. data/tests/wellformed/date/feed_modified_disney_ct.xml +7 -0
  2390. data/tests/wellformed/date/feed_modified_disney_mt.xml +7 -0
  2391. data/tests/wellformed/date/feed_modified_disney_pt.xml +7 -0
  2392. data/tests/wellformed/date/feed_modified_iso8601_ym.xml +9 -0
  2393. data/tests/wellformed/date/feed_modified_iso8601_ym_2.xml +9 -0
  2394. data/tests/wellformed/date/feed_modified_iso8601_ymd.xml +9 -0
  2395. data/tests/wellformed/date/feed_modified_iso8601_ymd_2.xml +9 -0
  2396. data/tests/wellformed/date/feed_modified_iso8601_yo_2.xml +9 -0
  2397. data/tests/wellformed/date/feed_modified_map_date.xml +9 -0
  2398. data/tests/wellformed/date/feed_modified_rfc2822.xml +9 -0
  2399. data/tests/wellformed/date/feed_modified_rfc2822_rollover_june_31.xml +9 -0
  2400. data/tests/wellformed/date/feed_modified_rfc822.xml +9 -0
  2401. data/tests/wellformed/date/feed_modified_w3dtf_rollover_leapyear.xml +9 -0
  2402. data/tests/wellformed/date/feed_modified_w3dtf_rollover_leapyear400.xml +9 -0
  2403. data/tests/wellformed/date/feed_modified_w3dtf_rollover_nonleapyear.xml +9 -0
  2404. data/tests/wellformed/date/feed_modified_w3dtf_sf.xml +9 -0
  2405. data/tests/wellformed/date/feed_modified_w3dtf_tokyo.xml +9 -0
  2406. data/tests/wellformed/date/feed_modified_w3dtf_utc.xml +9 -0
  2407. data/tests/wellformed/date/feed_modified_w3dtf_y.xml +9 -0
  2408. data/tests/wellformed/date/feed_modified_w3dtf_ym.xml +9 -0
  2409. data/tests/wellformed/date/feed_modified_w3dtf_ymd.xml +9 -0
  2410. data/tests/wellformed/date/feed_modified_w3dtf_ymd_2.xml +9 -0
  2411. data/tests/wellformed/date/feed_updated_w3dtf_utc.xml +7 -0
  2412. data/tests/wellformed/date/item_dc_date.xml +11 -0
  2413. data/tests/wellformed/date/item_dc_date_map_modified.xml +11 -0
  2414. data/tests/wellformed/date/item_dc_date_w3dtf_utc.xml +11 -0
  2415. data/tests/wellformed/date/item_dc_date_w3dtf_utc_map_modified_parsed.xml +11 -0
  2416. data/tests/wellformed/date/item_dcterms_created.xml +11 -0
  2417. data/tests/wellformed/date/item_dcterms_created_w3dtf_utc.xml +11 -0
  2418. data/tests/wellformed/date/item_dcterms_issued.xml +11 -0
  2419. data/tests/wellformed/date/item_dcterms_issued_w3dtf_utc.xml +11 -0
  2420. data/tests/wellformed/date/item_dcterms_modified.xml +11 -0
  2421. data/tests/wellformed/date/item_dcterms_modified_map_date.xml +11 -0
  2422. data/tests/wellformed/date/item_dcterms_modified_w3dtf_utc.xml +11 -0
  2423. data/tests/wellformed/date/item_dcterms_modified_w3dtf_utc_map_date.xml +11 -0
  2424. data/tests/wellformed/date/item_expirationDate.xml +11 -0
  2425. data/tests/wellformed/date/item_expirationDate_rfc2822.xml +11 -0
  2426. data/tests/wellformed/date/item_pubDate.xml +11 -0
  2427. data/tests/wellformed/date/item_pubDate_euc-kr.xml +13 -0
  2428. data/tests/wellformed/date/item_pubDate_map_modified.xml +11 -0
  2429. data/tests/wellformed/date/item_pubDate_rfc2822.xml +11 -0
  2430. data/tests/wellformed/encoding/big5.xml +8 -0
  2431. data/tests/wellformed/encoding/csucs4.xml +0 -0
  2432. data/tests/wellformed/encoding/csunicode.xml +0 -0
  2433. data/tests/wellformed/encoding/encoding_attribute_crash.xml +9 -0
  2434. data/tests/wellformed/encoding/encoding_attribute_crash_2.xml +9 -0
  2435. data/tests/wellformed/encoding/euc-kr-attribute.xml +14 -0
  2436. data/tests/wellformed/encoding/euc-kr-item.xml +14 -0
  2437. data/tests/wellformed/encoding/euc-kr.xml +12 -0
  2438. data/tests/wellformed/encoding/http_application_atom_xml_charset.xml +8 -0
  2439. data/tests/wellformed/encoding/http_application_atom_xml_charset_overrides_encoding.xml +8 -0
  2440. data/tests/wellformed/encoding/http_application_atom_xml_default.xml +8 -0
  2441. data/tests/wellformed/encoding/http_application_atom_xml_encoding.xml +8 -0
  2442. data/tests/wellformed/encoding/http_application_rss_xml_charset.xml +8 -0
  2443. data/tests/wellformed/encoding/http_application_rss_xml_charset_overrides_encoding.xml +8 -0
  2444. data/tests/wellformed/encoding/http_application_rss_xml_default.xml +8 -0
  2445. data/tests/wellformed/encoding/http_application_rss_xml_encoding.xml +8 -0
  2446. data/tests/wellformed/encoding/http_application_xml_charset.xml +8 -0
  2447. data/tests/wellformed/encoding/http_application_xml_charset_overrides_encoding.xml +8 -0
  2448. data/tests/wellformed/encoding/http_application_xml_default.xml +8 -0
  2449. data/tests/wellformed/encoding/http_application_xml_dtd_charset.xml +8 -0
  2450. data/tests/wellformed/encoding/http_application_xml_dtd_charset_overrides_encoding.xml +8 -0
  2451. data/tests/wellformed/encoding/http_application_xml_dtd_default.xml +8 -0
  2452. data/tests/wellformed/encoding/http_application_xml_dtd_encoding.xml +8 -0
  2453. data/tests/wellformed/encoding/http_application_xml_encoding.xml +8 -0
  2454. data/tests/wellformed/encoding/http_application_xml_epe_charset.xml +8 -0
  2455. data/tests/wellformed/encoding/http_application_xml_epe_charset_overrides_encoding.xml +8 -0
  2456. data/tests/wellformed/encoding/http_application_xml_epe_default.xml +8 -0
  2457. data/tests/wellformed/encoding/http_application_xml_epe_encoding.xml +8 -0
  2458. data/tests/wellformed/encoding/http_encoding_attribute_crash.xml +13 -0
  2459. data/tests/wellformed/encoding/http_i18n.xml +13 -0
  2460. data/tests/wellformed/encoding/http_text_atom_xml_charset.xml +8 -0
  2461. data/tests/wellformed/encoding/http_text_atom_xml_charset_overrides_encoding.xml +8 -0
  2462. data/tests/wellformed/encoding/http_text_atom_xml_default.xml +8 -0
  2463. data/tests/wellformed/encoding/http_text_atom_xml_encoding.xml +8 -0
  2464. data/tests/wellformed/encoding/http_text_rss_xml_charset.xml +8 -0
  2465. data/tests/wellformed/encoding/http_text_rss_xml_charset_overrides_encoding.xml +8 -0
  2466. data/tests/wellformed/encoding/http_text_rss_xml_default.xml +8 -0
  2467. data/tests/wellformed/encoding/http_text_rss_xml_encoding.xml +8 -0
  2468. data/tests/wellformed/encoding/http_text_xml_bogus_charset.xml +8 -0
  2469. data/tests/wellformed/encoding/http_text_xml_bogus_param.xml +8 -0
  2470. data/tests/wellformed/encoding/http_text_xml_charset.xml +8 -0
  2471. data/tests/wellformed/encoding/http_text_xml_charset_2.xml +16 -0
  2472. data/tests/wellformed/encoding/http_text_xml_charset_overrides_encoding.xml +8 -0
  2473. data/tests/wellformed/encoding/http_text_xml_charset_overrides_encoding_2.xml +17 -0
  2474. data/tests/wellformed/encoding/http_text_xml_default.xml +8 -0
  2475. data/tests/wellformed/encoding/http_text_xml_epe_charset.xml +8 -0
  2476. data/tests/wellformed/encoding/http_text_xml_epe_charset_overrides_encoding.xml +8 -0
  2477. data/tests/wellformed/encoding/http_text_xml_epe_default.xml +8 -0
  2478. data/tests/wellformed/encoding/http_text_xml_epe_encoding.xml +8 -0
  2479. data/tests/wellformed/encoding/http_text_xml_qs.xml +8 -0
  2480. data/tests/wellformed/encoding/iso-10646-ucs-2.xml +0 -0
  2481. data/tests/wellformed/encoding/iso-10646-ucs-4.xml +0 -0
  2482. data/tests/wellformed/encoding/no_content_type_default.xml +7 -0
  2483. data/tests/wellformed/encoding/no_content_type_encoding.xml +7 -0
  2484. data/tests/wellformed/encoding/u16.xml +0 -0
  2485. data/tests/wellformed/encoding/ucs-2.xml +0 -0
  2486. data/tests/wellformed/encoding/ucs-4.xml +0 -0
  2487. data/tests/wellformed/encoding/utf-16be-autodetect.xml +0 -0
  2488. data/tests/wellformed/encoding/utf-16be-bom.xml +0 -0
  2489. data/tests/wellformed/encoding/utf-16be.xml +0 -0
  2490. data/tests/wellformed/encoding/utf-16le-autodetect.xml +0 -0
  2491. data/tests/wellformed/encoding/utf-16le-bom.xml +0 -0
  2492. data/tests/wellformed/encoding/utf-16le.xml +0 -0
  2493. data/tests/wellformed/encoding/utf-32be-autodetect.xml +0 -0
  2494. data/tests/wellformed/encoding/utf-32be-bom.xml +0 -0
  2495. data/tests/wellformed/encoding/utf-32be.xml +0 -0
  2496. data/tests/wellformed/encoding/utf-32le-autodetect.xml +0 -0
  2497. data/tests/wellformed/encoding/utf-32le-bom.xml +0 -0
  2498. data/tests/wellformed/encoding/utf-32le.xml +0 -0
  2499. data/tests/wellformed/encoding/utf-8-bom.xml +8 -0
  2500. data/tests/wellformed/encoding/utf16.xml +0 -0
  2501. data/tests/wellformed/encoding/utf_16.xml +0 -0
  2502. data/tests/wellformed/encoding/utf_32.xml +0 -0
  2503. data/tests/wellformed/encoding/x80_437.xml +9 -0
  2504. data/tests/wellformed/encoding/x80_850.xml +9 -0
  2505. data/tests/wellformed/encoding/x80_852.xml +9 -0
  2506. data/tests/wellformed/encoding/x80_855.xml +9 -0
  2507. data/tests/wellformed/encoding/x80_857.xml +9 -0
  2508. data/tests/wellformed/encoding/x80_860.xml +9 -0
  2509. data/tests/wellformed/encoding/x80_861.xml +9 -0
  2510. data/tests/wellformed/encoding/x80_862.xml +9 -0
  2511. data/tests/wellformed/encoding/x80_863.xml +9 -0
  2512. data/tests/wellformed/encoding/x80_865.xml +9 -0
  2513. data/tests/wellformed/encoding/x80_866.xml +9 -0
  2514. data/tests/wellformed/encoding/x80_cp037.xml +1 -0
  2515. data/tests/wellformed/encoding/x80_cp1125.xml +9 -0
  2516. data/tests/wellformed/encoding/x80_cp1250.xml +9 -0
  2517. data/tests/wellformed/encoding/x80_cp1251.xml +9 -0
  2518. data/tests/wellformed/encoding/x80_cp1252.xml +9 -0
  2519. data/tests/wellformed/encoding/x80_cp1253.xml +9 -0
  2520. data/tests/wellformed/encoding/x80_cp1254.xml +9 -0
  2521. data/tests/wellformed/encoding/x80_cp1255.xml +9 -0
  2522. data/tests/wellformed/encoding/x80_cp1256.xml +9 -0
  2523. data/tests/wellformed/encoding/x80_cp1257.xml +9 -0
  2524. data/tests/wellformed/encoding/x80_cp1258.xml +9 -0
  2525. data/tests/wellformed/encoding/x80_cp437.xml +9 -0
  2526. data/tests/wellformed/encoding/x80_cp500.xml +1 -0
  2527. data/tests/wellformed/encoding/x80_cp737.xml +9 -0
  2528. data/tests/wellformed/encoding/x80_cp775.xml +9 -0
  2529. data/tests/wellformed/encoding/x80_cp850.xml +9 -0
  2530. data/tests/wellformed/encoding/x80_cp852.xml +9 -0
  2531. data/tests/wellformed/encoding/x80_cp855.xml +9 -0
  2532. data/tests/wellformed/encoding/x80_cp856.xml +9 -0
  2533. data/tests/wellformed/encoding/x80_cp857.xml +9 -0
  2534. data/tests/wellformed/encoding/x80_cp860.xml +9 -0
  2535. data/tests/wellformed/encoding/x80_cp861.xml +9 -0
  2536. data/tests/wellformed/encoding/x80_cp862.xml +9 -0
  2537. data/tests/wellformed/encoding/x80_cp863.xml +9 -0
  2538. data/tests/wellformed/encoding/x80_cp864.xml +9 -0
  2539. data/tests/wellformed/encoding/x80_cp865.xml +9 -0
  2540. data/tests/wellformed/encoding/x80_cp866.xml +9 -0
  2541. data/tests/wellformed/encoding/x80_cp874.xml +9 -0
  2542. data/tests/wellformed/encoding/x80_cp875.xml +1 -0
  2543. data/tests/wellformed/encoding/x80_cp_is.xml +9 -0
  2544. data/tests/wellformed/encoding/x80_csibm037.xml +1 -0
  2545. data/tests/wellformed/encoding/x80_csibm500.xml +1 -0
  2546. data/tests/wellformed/encoding/x80_csibm855.xml +9 -0
  2547. data/tests/wellformed/encoding/x80_csibm857.xml +9 -0
  2548. data/tests/wellformed/encoding/x80_csibm860.xml +9 -0
  2549. data/tests/wellformed/encoding/x80_csibm861.xml +9 -0
  2550. data/tests/wellformed/encoding/x80_csibm863.xml +9 -0
  2551. data/tests/wellformed/encoding/x80_csibm864.xml +9 -0
  2552. data/tests/wellformed/encoding/x80_csibm865.xml +9 -0
  2553. data/tests/wellformed/encoding/x80_csibm866.xml +9 -0
  2554. data/tests/wellformed/encoding/x80_cskoi8r.xml +9 -0
  2555. data/tests/wellformed/encoding/x80_csmacintosh.xml +9 -0
  2556. data/tests/wellformed/encoding/x80_cspc775baltic.xml +9 -0
  2557. data/tests/wellformed/encoding/x80_cspc850multilingual.xml +9 -0
  2558. data/tests/wellformed/encoding/x80_cspc862latinhebrew.xml +9 -0
  2559. data/tests/wellformed/encoding/x80_cspc8codepage437.xml +9 -0
  2560. data/tests/wellformed/encoding/x80_cspcp852.xml +9 -0
  2561. data/tests/wellformed/encoding/x80_dbcs.xml +9 -0
  2562. data/tests/wellformed/encoding/x80_ebcdic-cp-be.xml +1 -0
  2563. data/tests/wellformed/encoding/x80_ebcdic-cp-ca.xml +1 -0
  2564. data/tests/wellformed/encoding/x80_ebcdic-cp-ch.xml +1 -0
  2565. data/tests/wellformed/encoding/x80_ebcdic-cp-nl.xml +1 -0
  2566. data/tests/wellformed/encoding/x80_ebcdic-cp-us.xml +1 -0
  2567. data/tests/wellformed/encoding/x80_ebcdic-cp-wt.xml +1 -0
  2568. data/tests/wellformed/encoding/x80_ebcdic_cp_be.xml +1 -0
  2569. data/tests/wellformed/encoding/x80_ebcdic_cp_ca.xml +1 -0
  2570. data/tests/wellformed/encoding/x80_ebcdic_cp_ch.xml +1 -0
  2571. data/tests/wellformed/encoding/x80_ebcdic_cp_nl.xml +1 -0
  2572. data/tests/wellformed/encoding/x80_ebcdic_cp_us.xml +1 -0
  2573. data/tests/wellformed/encoding/x80_ebcdic_cp_wt.xml +1 -0
  2574. data/tests/wellformed/encoding/x80_ibm037.xml +1 -0
  2575. data/tests/wellformed/encoding/x80_ibm039.xml +1 -0
  2576. data/tests/wellformed/encoding/x80_ibm1140.xml +1 -0
  2577. data/tests/wellformed/encoding/x80_ibm437.xml +9 -0
  2578. data/tests/wellformed/encoding/x80_ibm500.xml +1 -0
  2579. data/tests/wellformed/encoding/x80_ibm775.xml +9 -0
  2580. data/tests/wellformed/encoding/x80_ibm850.xml +9 -0
  2581. data/tests/wellformed/encoding/x80_ibm852.xml +9 -0
  2582. data/tests/wellformed/encoding/x80_ibm855.xml +9 -0
  2583. data/tests/wellformed/encoding/x80_ibm857.xml +9 -0
  2584. data/tests/wellformed/encoding/x80_ibm860.xml +9 -0
  2585. data/tests/wellformed/encoding/x80_ibm861.xml +9 -0
  2586. data/tests/wellformed/encoding/x80_ibm862.xml +9 -0
  2587. data/tests/wellformed/encoding/x80_ibm863.xml +9 -0
  2588. data/tests/wellformed/encoding/x80_ibm864.xml +9 -0
  2589. data/tests/wellformed/encoding/x80_ibm865.xml +9 -0
  2590. data/tests/wellformed/encoding/x80_ibm866.xml +9 -0
  2591. data/tests/wellformed/encoding/x80_koi8-r.xml +9 -0
  2592. data/tests/wellformed/encoding/x80_koi8-t.xml +9 -0
  2593. data/tests/wellformed/encoding/x80_koi8-u.xml +9 -0
  2594. data/tests/wellformed/encoding/x80_mac-cyrillic.xml +9 -0
  2595. data/tests/wellformed/encoding/x80_mac.xml +9 -0
  2596. data/tests/wellformed/encoding/x80_maccentraleurope.xml +9 -0
  2597. data/tests/wellformed/encoding/x80_maccyrillic.xml +9 -0
  2598. data/tests/wellformed/encoding/x80_macgreek.xml +9 -0
  2599. data/tests/wellformed/encoding/x80_maciceland.xml +9 -0
  2600. data/tests/wellformed/encoding/x80_macintosh.xml +9 -0
  2601. data/tests/wellformed/encoding/x80_maclatin2.xml +9 -0
  2602. data/tests/wellformed/encoding/x80_macroman.xml +9 -0
  2603. data/tests/wellformed/encoding/x80_macturkish.xml +9 -0
  2604. data/tests/wellformed/encoding/x80_ms-ansi.xml +9 -0
  2605. data/tests/wellformed/encoding/x80_ms-arab.xml +9 -0
  2606. data/tests/wellformed/encoding/x80_ms-cyrl.xml +9 -0
  2607. data/tests/wellformed/encoding/x80_ms-ee.xml +9 -0
  2608. data/tests/wellformed/encoding/x80_ms-greek.xml +9 -0
  2609. data/tests/wellformed/encoding/x80_ms-hebr.xml +9 -0
  2610. data/tests/wellformed/encoding/x80_ms-turk.xml +9 -0
  2611. data/tests/wellformed/encoding/x80_tcvn-5712.xml +9 -0
  2612. data/tests/wellformed/encoding/x80_tcvn.xml +9 -0
  2613. data/tests/wellformed/encoding/x80_tcvn5712-1.xml +9 -0
  2614. data/tests/wellformed/encoding/x80_viscii.xml +9 -0
  2615. data/tests/wellformed/encoding/x80_winbaltrim.xml +9 -0
  2616. data/tests/wellformed/encoding/x80_windows-1250.xml +9 -0
  2617. data/tests/wellformed/encoding/x80_windows-1251.xml +9 -0
  2618. data/tests/wellformed/encoding/x80_windows-1252.xml +9 -0
  2619. data/tests/wellformed/encoding/x80_windows-1253.xml +9 -0
  2620. data/tests/wellformed/encoding/x80_windows-1254.xml +9 -0
  2621. data/tests/wellformed/encoding/x80_windows-1255.xml +9 -0
  2622. data/tests/wellformed/encoding/x80_windows-1256.xml +9 -0
  2623. data/tests/wellformed/encoding/x80_windows-1257.xml +9 -0
  2624. data/tests/wellformed/encoding/x80_windows-1258.xml +9 -0
  2625. data/tests/wellformed/encoding/x80_windows_1250.xml +9 -0
  2626. data/tests/wellformed/encoding/x80_windows_1251.xml +9 -0
  2627. data/tests/wellformed/encoding/x80_windows_1252.xml +9 -0
  2628. data/tests/wellformed/encoding/x80_windows_1253.xml +9 -0
  2629. data/tests/wellformed/encoding/x80_windows_1254.xml +9 -0
  2630. data/tests/wellformed/encoding/x80_windows_1255.xml +9 -0
  2631. data/tests/wellformed/encoding/x80_windows_1256.xml +9 -0
  2632. data/tests/wellformed/encoding/x80_windows_1257.xml +9 -0
  2633. data/tests/wellformed/encoding/x80_windows_1258.xml +9 -0
  2634. data/tests/wellformed/feedburner/feedburner_browserfriendly.xml +9 -0
  2635. data/tests/wellformed/http/headers_foo.xml +7 -0
  2636. data/tests/wellformed/itunes/itunes_channel_block.xml +9 -0
  2637. data/tests/wellformed/itunes/itunes_channel_block_false.xml +9 -0
  2638. data/tests/wellformed/itunes/itunes_channel_block_no.xml +9 -0
  2639. data/tests/wellformed/itunes/itunes_channel_block_true.xml +9 -0
  2640. data/tests/wellformed/itunes/itunes_channel_block_uppercase.xml +9 -0
  2641. data/tests/wellformed/itunes/itunes_channel_block_whitespace.xml +9 -0
  2642. data/tests/wellformed/itunes/itunes_channel_category.xml +9 -0
  2643. data/tests/wellformed/itunes/itunes_channel_category_nested.xml +11 -0
  2644. data/tests/wellformed/itunes/itunes_channel_category_scheme.xml +9 -0
  2645. data/tests/wellformed/itunes/itunes_channel_explicit.xml +9 -0
  2646. data/tests/wellformed/itunes/itunes_channel_explicit_false.xml +9 -0
  2647. data/tests/wellformed/itunes/itunes_channel_explicit_no.xml +9 -0
  2648. data/tests/wellformed/itunes/itunes_channel_explicit_true.xml +9 -0
  2649. data/tests/wellformed/itunes/itunes_channel_explicit_uppercase.xml +9 -0
  2650. data/tests/wellformed/itunes/itunes_channel_explicit_whitespace.xml +9 -0
  2651. data/tests/wellformed/itunes/itunes_channel_image.xml +9 -0
  2652. data/tests/wellformed/itunes/itunes_channel_keywords.xml +9 -0
  2653. data/tests/wellformed/itunes/itunes_channel_keywords_duplicate.xml +9 -0
  2654. data/tests/wellformed/itunes/itunes_channel_keywords_duplicate_2.xml +10 -0
  2655. data/tests/wellformed/itunes/itunes_channel_keywords_multiple.xml +9 -0
  2656. data/tests/wellformed/itunes/itunes_channel_link_image.xml +9 -0
  2657. data/tests/wellformed/itunes/itunes_channel_owner_email.xml +12 -0
  2658. data/tests/wellformed/itunes/itunes_channel_owner_name.xml +12 -0
  2659. data/tests/wellformed/itunes/itunes_channel_subtitle.xml +9 -0
  2660. data/tests/wellformed/itunes/itunes_channel_summary.xml +9 -0
  2661. data/tests/wellformed/itunes/itunes_core_element_uppercase.xml +9 -0
  2662. data/tests/wellformed/itunes/itunes_enclosure_url_maps_id.xml +11 -0
  2663. data/tests/wellformed/itunes/itunes_enclosure_url_maps_id_2.xml +12 -0
  2664. data/tests/wellformed/itunes/itunes_item_author_map_author.xml +11 -0
  2665. data/tests/wellformed/itunes/itunes_item_block.xml +11 -0
  2666. data/tests/wellformed/itunes/itunes_item_block_false.xml +11 -0
  2667. data/tests/wellformed/itunes/itunes_item_block_no.xml +11 -0
  2668. data/tests/wellformed/itunes/itunes_item_block_true.xml +11 -0
  2669. data/tests/wellformed/itunes/itunes_item_block_uppercase.xml +11 -0
  2670. data/tests/wellformed/itunes/itunes_item_block_whitespace.xml +11 -0
  2671. data/tests/wellformed/itunes/itunes_item_category.xml +11 -0
  2672. data/tests/wellformed/itunes/itunes_item_category_nested.xml +13 -0
  2673. data/tests/wellformed/itunes/itunes_item_category_scheme.xml +11 -0
  2674. data/tests/wellformed/itunes/itunes_item_duration.xml +11 -0
  2675. data/tests/wellformed/itunes/itunes_item_explicit.xml +11 -0
  2676. data/tests/wellformed/itunes/itunes_item_explicit_false.xml +11 -0
  2677. data/tests/wellformed/itunes/itunes_item_explicit_no.xml +11 -0
  2678. data/tests/wellformed/itunes/itunes_item_explicit_true.xml +11 -0
  2679. data/tests/wellformed/itunes/itunes_item_explicit_uppercase.xml +11 -0
  2680. data/tests/wellformed/itunes/itunes_item_explicit_whitespace.xml +11 -0
  2681. data/tests/wellformed/itunes/itunes_item_image.xml +11 -0
  2682. data/tests/wellformed/itunes/itunes_item_link_image.xml +11 -0
  2683. data/tests/wellformed/itunes/itunes_item_subtitle.xml +11 -0
  2684. data/tests/wellformed/itunes/itunes_item_summary.xml +11 -0
  2685. data/tests/wellformed/itunes/itunes_link_enclosure_maps_id.xml +9 -0
  2686. data/tests/wellformed/itunes/itunes_link_enclosure_maps_id_2.xml +10 -0
  2687. data/tests/wellformed/itunes/itunes_namespace.xml +9 -0
  2688. data/tests/wellformed/itunes/itunes_namespace_example.xml +9 -0
  2689. data/tests/wellformed/itunes/itunes_namespace_lowercase.xml +9 -0
  2690. data/tests/wellformed/itunes/itunes_namespace_uppercase.xml +9 -0
  2691. data/tests/wellformed/lang/channel_dc_language.xml +9 -0
  2692. data/tests/wellformed/lang/channel_language.xml +9 -0
  2693. data/tests/wellformed/lang/entry_content_xml_lang.xml +9 -0
  2694. data/tests/wellformed/lang/entry_content_xml_lang_blank.xml +9 -0
  2695. data/tests/wellformed/lang/entry_content_xml_lang_blank_2.xml +9 -0
  2696. data/tests/wellformed/lang/entry_content_xml_lang_blank_3.xml +12 -0
  2697. data/tests/wellformed/lang/entry_content_xml_lang_inherit.xml +9 -0
  2698. data/tests/wellformed/lang/entry_content_xml_lang_inherit_2.xml +9 -0
  2699. data/tests/wellformed/lang/entry_content_xml_lang_inherit_3.xml +10 -0
  2700. data/tests/wellformed/lang/entry_content_xml_lang_inherit_4.xml +10 -0
  2701. data/tests/wellformed/lang/entry_summary_xml_lang.xml +9 -0
  2702. data/tests/wellformed/lang/entry_summary_xml_lang_blank.xml +9 -0
  2703. data/tests/wellformed/lang/entry_summary_xml_lang_inherit.xml +9 -0
  2704. data/tests/wellformed/lang/entry_summary_xml_lang_inherit_2.xml +9 -0
  2705. data/tests/wellformed/lang/entry_summary_xml_lang_inherit_3.xml +10 -0
  2706. data/tests/wellformed/lang/entry_summary_xml_lang_inherit_4.xml +10 -0
  2707. data/tests/wellformed/lang/entry_title_xml_lang.xml +9 -0
  2708. data/tests/wellformed/lang/entry_title_xml_lang_blank.xml +9 -0
  2709. data/tests/wellformed/lang/entry_title_xml_lang_inherit.xml +9 -0
  2710. data/tests/wellformed/lang/entry_title_xml_lang_inherit_2.xml +9 -0
  2711. data/tests/wellformed/lang/entry_title_xml_lang_inherit_3.xml +10 -0
  2712. data/tests/wellformed/lang/entry_title_xml_lang_inherit_4.xml +10 -0
  2713. data/tests/wellformed/lang/feed_copyright_xml_lang.xml +7 -0
  2714. data/tests/wellformed/lang/feed_copyright_xml_lang_blank.xml +7 -0
  2715. data/tests/wellformed/lang/feed_copyright_xml_lang_inherit.xml +7 -0
  2716. data/tests/wellformed/lang/feed_copyright_xml_lang_inherit_2.xml +7 -0
  2717. data/tests/wellformed/lang/feed_copyright_xml_lang_inherit_3.xml +8 -0
  2718. data/tests/wellformed/lang/feed_copyright_xml_lang_inherit_4.xml +8 -0
  2719. data/tests/wellformed/lang/feed_info_xml_lang.xml +7 -0
  2720. data/tests/wellformed/lang/feed_info_xml_lang_blank.xml +7 -0
  2721. data/tests/wellformed/lang/feed_info_xml_lang_inherit.xml +7 -0
  2722. data/tests/wellformed/lang/feed_info_xml_lang_inherit_2.xml +7 -0
  2723. data/tests/wellformed/lang/feed_info_xml_lang_inherit_3.xml +8 -0
  2724. data/tests/wellformed/lang/feed_info_xml_lang_inherit_4.xml +8 -0
  2725. data/tests/wellformed/lang/feed_language.xml +9 -0
  2726. data/tests/wellformed/lang/feed_language_override.xml +9 -0
  2727. data/tests/wellformed/lang/feed_not_xml_lang.xml +7 -0
  2728. data/tests/wellformed/lang/feed_not_xml_lang_2.xml +7 -0
  2729. data/tests/wellformed/lang/feed_tagline_xml_lang.xml +7 -0
  2730. data/tests/wellformed/lang/feed_tagline_xml_lang_blank.xml +7 -0
  2731. data/tests/wellformed/lang/feed_tagline_xml_lang_inherit.xml +7 -0
  2732. data/tests/wellformed/lang/feed_tagline_xml_lang_inherit_2.xml +7 -0
  2733. data/tests/wellformed/lang/feed_tagline_xml_lang_inherit_3.xml +8 -0
  2734. data/tests/wellformed/lang/feed_tagline_xml_lang_inherit_4.xml +8 -0
  2735. data/tests/wellformed/lang/feed_title_xml_lang.xml +7 -0
  2736. data/tests/wellformed/lang/feed_title_xml_lang_blank.xml +7 -0
  2737. data/tests/wellformed/lang/feed_title_xml_lang_inherit.xml +7 -0
  2738. data/tests/wellformed/lang/feed_title_xml_lang_inherit_2.xml +7 -0
  2739. data/tests/wellformed/lang/feed_title_xml_lang_inherit_3.xml +8 -0
  2740. data/tests/wellformed/lang/feed_title_xml_lang_inherit_4.xml +8 -0
  2741. data/tests/wellformed/lang/feed_xml_lang.xml +6 -0
  2742. data/tests/wellformed/lang/http_content_language.xml +7 -0
  2743. data/tests/wellformed/lang/http_content_language_entry_title_inherit.xml +10 -0
  2744. data/tests/wellformed/lang/http_content_language_entry_title_inherit_2.xml +11 -0
  2745. data/tests/wellformed/lang/http_content_language_feed_language.xml +10 -0
  2746. data/tests/wellformed/lang/http_content_language_feed_xml_lang.xml +7 -0
  2747. data/tests/wellformed/lang/item_content_encoded_xml_lang.xml +11 -0
  2748. data/tests/wellformed/lang/item_content_encoded_xml_lang_inherit.xml +11 -0
  2749. data/tests/wellformed/lang/item_dc_language.xml +11 -0
  2750. data/tests/wellformed/lang/item_fullitem_xml_lang.xml +11 -0
  2751. data/tests/wellformed/lang/item_fullitem_xml_lang_inherit.xml +11 -0
  2752. data/tests/wellformed/lang/item_xhtml_body_xml_lang.xml +13 -0
  2753. data/tests/wellformed/lang/item_xhtml_body_xml_lang_inherit.xml +13 -0
  2754. data/tests/wellformed/namespace/rss1.0withModules.xml +47 -0
  2755. data/tests/wellformed/namespace/rss1.0withModulesNoDefNS.xml +48 -0
  2756. data/tests/wellformed/namespace/rss1.0withModulesNoDefNSLocalNameClash.xml +53 -0
  2757. data/tests/wellformed/namespace/rss2.0NSwithModules.xml +50 -0
  2758. data/tests/wellformed/namespace/rss2.0NSwithModulesNoDefNS.xml +50 -0
  2759. data/tests/wellformed/namespace/rss2.0NSwithModulesNoDefNSLocalNameClash.xml +58 -0
  2760. data/tests/wellformed/namespace/rss2.0noNSwithModules.xml +49 -0
  2761. data/tests/wellformed/namespace/rss2.0noNSwithModulesLocalNameClash.xml +57 -0
  2762. data/tests/wellformed/rdf/doctype_contains_entity_decl.xml +17 -0
  2763. data/tests/wellformed/rdf/rdf_channel_description.xml +9 -0
  2764. data/tests/wellformed/rdf/rdf_channel_link.xml +9 -0
  2765. data/tests/wellformed/rdf/rdf_channel_title.xml +9 -0
  2766. data/tests/wellformed/rdf/rdf_item_description.xml +16 -0
  2767. data/tests/wellformed/rdf/rdf_item_link.xml +16 -0
  2768. data/tests/wellformed/rdf/rdf_item_rdf_about.xml +15 -0
  2769. data/tests/wellformed/rdf/rdf_item_title.xml +16 -0
  2770. data/tests/wellformed/rdf/rss090_channel_title.xml +12 -0
  2771. data/tests/wellformed/rdf/rss090_item_title.xml +12 -0
  2772. data/tests/wellformed/rdf/rss_version_10.xml +6 -0
  2773. data/tests/wellformed/rdf/rss_version_10_not_default_ns.xml +8 -0
  2774. data/tests/wellformed/rss/aaa_wellformed.xml +6 -0
  2775. data/tests/wellformed/rss/channel_author.xml +9 -0
  2776. data/tests/wellformed/rss/channel_author_map_author_detail_email.xml +9 -0
  2777. data/tests/wellformed/rss/channel_author_map_author_detail_email_2.xml +9 -0
  2778. data/tests/wellformed/rss/channel_author_map_author_detail_email_3.xml +9 -0
  2779. data/tests/wellformed/rss/channel_author_map_author_detail_name.xml +9 -0
  2780. data/tests/wellformed/rss/channel_author_map_author_detail_name_2.xml +9 -0
  2781. data/tests/wellformed/rss/channel_category.xml +9 -0
  2782. data/tests/wellformed/rss/channel_category_domain.xml +9 -0
  2783. data/tests/wellformed/rss/channel_category_multiple.xml +10 -0
  2784. data/tests/wellformed/rss/channel_category_multiple_2.xml +10 -0
  2785. data/tests/wellformed/rss/channel_cloud_domain.xml +9 -0
  2786. data/tests/wellformed/rss/channel_cloud_path.xml +9 -0
  2787. data/tests/wellformed/rss/channel_cloud_port.xml +9 -0
  2788. data/tests/wellformed/rss/channel_cloud_protocol.xml +9 -0
  2789. data/tests/wellformed/rss/channel_cloud_registerProcedure.xml +9 -0
  2790. data/tests/wellformed/rss/channel_copyright.xml +9 -0
  2791. data/tests/wellformed/rss/channel_dc_author.xml +9 -0
  2792. data/tests/wellformed/rss/channel_dc_author_map_author_detail_email.xml +9 -0
  2793. data/tests/wellformed/rss/channel_dc_author_map_author_detail_name.xml +9 -0
  2794. data/tests/wellformed/rss/channel_dc_contributor.xml +9 -0
  2795. data/tests/wellformed/rss/channel_dc_creator.xml +9 -0
  2796. data/tests/wellformed/rss/channel_dc_creator_map_author_detail_email.xml +9 -0
  2797. data/tests/wellformed/rss/channel_dc_creator_map_author_detail_name.xml +9 -0
  2798. data/tests/wellformed/rss/channel_dc_publisher.xml +9 -0
  2799. data/tests/wellformed/rss/channel_dc_publisher_email.xml +9 -0
  2800. data/tests/wellformed/rss/channel_dc_publisher_name.xml +9 -0
  2801. data/tests/wellformed/rss/channel_dc_rights.xml +9 -0
  2802. data/tests/wellformed/rss/channel_dc_subject.xml +9 -0
  2803. data/tests/wellformed/rss/channel_dc_subject_2.xml +9 -0
  2804. data/tests/wellformed/rss/channel_dc_subject_multiple.xml +10 -0
  2805. data/tests/wellformed/rss/channel_dc_title.xml +9 -0
  2806. data/tests/wellformed/rss/channel_description.xml +9 -0
  2807. data/tests/wellformed/rss/channel_description_escaped_markup.xml +9 -0
  2808. data/tests/wellformed/rss/channel_description_map_tagline.xml +9 -0
  2809. data/tests/wellformed/rss/channel_description_naked_markup.xml +9 -0
  2810. data/tests/wellformed/rss/channel_description_shorttag.xml +10 -0
  2811. data/tests/wellformed/rss/channel_docs.xml +9 -0
  2812. data/tests/wellformed/rss/channel_generator.xml +9 -0
  2813. data/tests/wellformed/rss/channel_image_description.xml +16 -0
  2814. data/tests/wellformed/rss/channel_image_height.xml +16 -0
  2815. data/tests/wellformed/rss/channel_image_link.xml +16 -0
  2816. data/tests/wellformed/rss/channel_image_link_conflict.xml +12 -0
  2817. data/tests/wellformed/rss/channel_image_title.xml +16 -0
  2818. data/tests/wellformed/rss/channel_image_title_conflict.xml +12 -0
  2819. data/tests/wellformed/rss/channel_image_url.xml +16 -0
  2820. data/tests/wellformed/rss/channel_image_width.xml +16 -0
  2821. data/tests/wellformed/rss/channel_link.xml +9 -0
  2822. data/tests/wellformed/rss/channel_managingEditor.xml +9 -0
  2823. data/tests/wellformed/rss/channel_managingEditor_map_author_detail_email.xml +9 -0
  2824. data/tests/wellformed/rss/channel_managingEditor_map_author_detail_name.xml +9 -0
  2825. data/tests/wellformed/rss/channel_textInput_description.xml +14 -0
  2826. data/tests/wellformed/rss/channel_textInput_description_conflict.xml +12 -0
  2827. data/tests/wellformed/rss/channel_textInput_link.xml +12 -0
  2828. data/tests/wellformed/rss/channel_textInput_link_conflict.xml +12 -0
  2829. data/tests/wellformed/rss/channel_textInput_name.xml +11 -0
  2830. data/tests/wellformed/rss/channel_textInput_title.xml +12 -0
  2831. data/tests/wellformed/rss/channel_textInput_title_conflict.xml +12 -0
  2832. data/tests/wellformed/rss/channel_title.xml +9 -0
  2833. data/tests/wellformed/rss/channel_title_apos.xml +9 -0
  2834. data/tests/wellformed/rss/channel_title_gt.xml +9 -0
  2835. data/tests/wellformed/rss/channel_title_lt.xml +9 -0
  2836. data/tests/wellformed/rss/channel_ttl.xml +9 -0
  2837. data/tests/wellformed/rss/channel_webMaster.xml +9 -0
  2838. data/tests/wellformed/rss/channel_webMaster_email.xml +9 -0
  2839. data/tests/wellformed/rss/channel_webMaster_name.xml +9 -0
  2840. data/tests/wellformed/rss/item_author.xml +11 -0
  2841. data/tests/wellformed/rss/item_author_map_author_detail_email.xml +11 -0
  2842. data/tests/wellformed/rss/item_author_map_author_detail_name.xml +11 -0
  2843. data/tests/wellformed/rss/item_category.xml +11 -0
  2844. data/tests/wellformed/rss/item_category_domain.xml +11 -0
  2845. data/tests/wellformed/rss/item_category_multiple.xml +12 -0
  2846. data/tests/wellformed/rss/item_category_multiple_2.xml +12 -0
  2847. data/tests/wellformed/rss/item_comments.xml +11 -0
  2848. data/tests/wellformed/rss/item_content_encoded.xml +11 -0
  2849. data/tests/wellformed/rss/item_content_encoded_mode.xml +11 -0
  2850. data/tests/wellformed/rss/item_content_encoded_type.xml +11 -0
  2851. data/tests/wellformed/rss/item_dc_author.xml +11 -0
  2852. data/tests/wellformed/rss/item_dc_author_map_author_detail_email.xml +11 -0
  2853. data/tests/wellformed/rss/item_dc_author_map_author_detail_name.xml +11 -0
  2854. data/tests/wellformed/rss/item_dc_contributor.xml +11 -0
  2855. data/tests/wellformed/rss/item_dc_creator.xml +11 -0
  2856. data/tests/wellformed/rss/item_dc_creator_map_author_detail_email.xml +11 -0
  2857. data/tests/wellformed/rss/item_dc_creator_map_author_detail_name.xml +11 -0
  2858. data/tests/wellformed/rss/item_dc_publisher.xml +11 -0
  2859. data/tests/wellformed/rss/item_dc_publisher_email.xml +11 -0
  2860. data/tests/wellformed/rss/item_dc_publisher_name.xml +11 -0
  2861. data/tests/wellformed/rss/item_dc_rights.xml +11 -0
  2862. data/tests/wellformed/rss/item_dc_subject.xml +11 -0
  2863. data/tests/wellformed/rss/item_dc_subject_2.xml +11 -0
  2864. data/tests/wellformed/rss/item_dc_subject_multiple.xml +12 -0
  2865. data/tests/wellformed/rss/item_dc_title.xml +11 -0
  2866. data/tests/wellformed/rss/item_description.xml +11 -0
  2867. data/tests/wellformed/rss/item_description_and_summary.xml +12 -0
  2868. data/tests/wellformed/rss/item_description_br.xml +11 -0
  2869. data/tests/wellformed/rss/item_description_br_shorttag.xml +12 -0
  2870. data/tests/wellformed/rss/item_description_escaped_markup.xml +11 -0
  2871. data/tests/wellformed/rss/item_description_map_summary.xml +11 -0
  2872. data/tests/wellformed/rss/item_description_naked_markup.xml +11 -0
  2873. data/tests/wellformed/rss/item_description_not_a_doctype.xml +9 -0
  2874. data/tests/wellformed/rss/item_enclosure_length.xml +12 -0
  2875. data/tests/wellformed/rss/item_enclosure_multiple.xml +13 -0
  2876. data/tests/wellformed/rss/item_enclosure_type.xml +12 -0
  2877. data/tests/wellformed/rss/item_enclosure_url.xml +12 -0
  2878. data/tests/wellformed/rss/item_fullitem.xml +11 -0
  2879. data/tests/wellformed/rss/item_fullitem_mode.xml +11 -0
  2880. data/tests/wellformed/rss/item_fullitem_type.xml +11 -0
  2881. data/tests/wellformed/rss/item_guid.xml +11 -0
  2882. data/tests/wellformed/rss/item_guid_conflict_link.xml +12 -0
  2883. data/tests/wellformed/rss/item_guid_guidislink.xml +11 -0
  2884. data/tests/wellformed/rss/item_guid_isPermaLink_conflict_link.xml +12 -0
  2885. data/tests/wellformed/rss/item_guid_isPermaLink_conflict_link_not_guidislink.xml +12 -0
  2886. data/tests/wellformed/rss/item_guid_isPermaLink_guidislink.xml +11 -0
  2887. data/tests/wellformed/rss/item_guid_isPermaLink_map_link.xml +11 -0
  2888. data/tests/wellformed/rss/item_guid_map_link.xml +11 -0
  2889. data/tests/wellformed/rss/item_guid_not_permalink.xml +11 -0
  2890. data/tests/wellformed/rss/item_guid_not_permalink_conflict_link.xml +12 -0
  2891. data/tests/wellformed/rss/item_guid_not_permalink_not_guidislink.xml +11 -0
  2892. data/tests/wellformed/rss/item_guid_not_permalink_not_guidislink_2.xml +12 -0
  2893. data/tests/wellformed/rss/item_link.xml +11 -0
  2894. data/tests/wellformed/rss/item_source.xml +12 -0
  2895. data/tests/wellformed/rss/item_source_url.xml +12 -0
  2896. data/tests/wellformed/rss/item_summary_and_description.xml +12 -0
  2897. data/tests/wellformed/rss/item_title.xml +11 -0
  2898. data/tests/wellformed/rss/item_xhtml_body.xml +13 -0
  2899. data/tests/wellformed/rss/item_xhtml_body_mode.xml +13 -0
  2900. data/tests/wellformed/rss/item_xhtml_body_type.xml +13 -0
  2901. data/tests/wellformed/rss/rss_namespace_1.xml +9 -0
  2902. data/tests/wellformed/rss/rss_namespace_2.xml +9 -0
  2903. data/tests/wellformed/rss/rss_namespace_3.xml +9 -0
  2904. data/tests/wellformed/rss/rss_namespace_4.xml +9 -0
  2905. data/tests/wellformed/rss/rss_version_090.xml +6 -0
  2906. data/tests/wellformed/rss/rss_version_091_netscape.xml +7 -0
  2907. data/tests/wellformed/rss/rss_version_091_userland.xml +6 -0
  2908. data/tests/wellformed/rss/rss_version_092.xml +6 -0
  2909. data/tests/wellformed/rss/rss_version_093.xml +6 -0
  2910. data/tests/wellformed/rss/rss_version_094.xml +6 -0
  2911. data/tests/wellformed/rss/rss_version_20.xml +6 -0
  2912. data/tests/wellformed/rss/rss_version_201.xml +6 -0
  2913. data/tests/wellformed/rss/rss_version_21.xml +6 -0
  2914. data/tests/wellformed/rss/rss_version_missing.xml +9 -0
  2915. data/tests/wellformed/sanitize/entry_content_applet.xml +9 -0
  2916. data/tests/wellformed/sanitize/entry_content_blink.xml +9 -0
  2917. data/tests/wellformed/sanitize/entry_content_crazy.xml +75 -0
  2918. data/tests/wellformed/sanitize/entry_content_embed.xml +9 -0
  2919. data/tests/wellformed/sanitize/entry_content_frame.xml +9 -0
  2920. data/tests/wellformed/sanitize/entry_content_iframe.xml +9 -0
  2921. data/tests/wellformed/sanitize/entry_content_link.xml +9 -0
  2922. data/tests/wellformed/sanitize/entry_content_meta.xml +9 -0
  2923. data/tests/wellformed/sanitize/entry_content_object.xml +9 -0
  2924. data/tests/wellformed/sanitize/entry_content_onabort.xml +9 -0
  2925. data/tests/wellformed/sanitize/entry_content_onblur.xml +9 -0
  2926. data/tests/wellformed/sanitize/entry_content_onchange.xml +9 -0
  2927. data/tests/wellformed/sanitize/entry_content_onclick.xml +9 -0
  2928. data/tests/wellformed/sanitize/entry_content_ondblclick.xml +9 -0
  2929. data/tests/wellformed/sanitize/entry_content_onerror.xml +9 -0
  2930. data/tests/wellformed/sanitize/entry_content_onfocus.xml +9 -0
  2931. data/tests/wellformed/sanitize/entry_content_onkeydown.xml +9 -0
  2932. data/tests/wellformed/sanitize/entry_content_onkeypress.xml +9 -0
  2933. data/tests/wellformed/sanitize/entry_content_onkeyup.xml +9 -0
  2934. data/tests/wellformed/sanitize/entry_content_onload.xml +9 -0
  2935. data/tests/wellformed/sanitize/entry_content_onmousedown.xml +9 -0
  2936. data/tests/wellformed/sanitize/entry_content_onmouseout.xml +9 -0
  2937. data/tests/wellformed/sanitize/entry_content_onmouseover.xml +9 -0
  2938. data/tests/wellformed/sanitize/entry_content_onmouseup.xml +9 -0
  2939. data/tests/wellformed/sanitize/entry_content_onreset.xml +9 -0
  2940. data/tests/wellformed/sanitize/entry_content_onresize.xml +9 -0
  2941. data/tests/wellformed/sanitize/entry_content_onsubmit.xml +9 -0
  2942. data/tests/wellformed/sanitize/entry_content_onunload.xml +9 -0
  2943. data/tests/wellformed/sanitize/entry_content_script.xml +9 -0
  2944. data/tests/wellformed/sanitize/entry_content_script_base64.xml +12 -0
  2945. data/tests/wellformed/sanitize/entry_content_script_cdata.xml +9 -0
  2946. data/tests/wellformed/sanitize/entry_content_script_inline.xml +9 -0
  2947. data/tests/wellformed/sanitize/entry_content_style.xml +9 -0
  2948. data/tests/wellformed/sanitize/entry_summary_applet.xml +9 -0
  2949. data/tests/wellformed/sanitize/entry_summary_blink.xml +9 -0
  2950. data/tests/wellformed/sanitize/entry_summary_crazy.xml +75 -0
  2951. data/tests/wellformed/sanitize/entry_summary_embed.xml +9 -0
  2952. data/tests/wellformed/sanitize/entry_summary_frame.xml +9 -0
  2953. data/tests/wellformed/sanitize/entry_summary_iframe.xml +9 -0
  2954. data/tests/wellformed/sanitize/entry_summary_link.xml +9 -0
  2955. data/tests/wellformed/sanitize/entry_summary_meta.xml +9 -0
  2956. data/tests/wellformed/sanitize/entry_summary_object.xml +9 -0
  2957. data/tests/wellformed/sanitize/entry_summary_onabort.xml +9 -0
  2958. data/tests/wellformed/sanitize/entry_summary_onblur.xml +9 -0
  2959. data/tests/wellformed/sanitize/entry_summary_onchange.xml +9 -0
  2960. data/tests/wellformed/sanitize/entry_summary_onclick.xml +9 -0
  2961. data/tests/wellformed/sanitize/entry_summary_ondblclick.xml +9 -0
  2962. data/tests/wellformed/sanitize/entry_summary_onerror.xml +9 -0
  2963. data/tests/wellformed/sanitize/entry_summary_onfocus.xml +9 -0
  2964. data/tests/wellformed/sanitize/entry_summary_onkeydown.xml +9 -0
  2965. data/tests/wellformed/sanitize/entry_summary_onkeypress.xml +9 -0
  2966. data/tests/wellformed/sanitize/entry_summary_onkeyup.xml +9 -0
  2967. data/tests/wellformed/sanitize/entry_summary_onload.xml +9 -0
  2968. data/tests/wellformed/sanitize/entry_summary_onmousedown.xml +9 -0
  2969. data/tests/wellformed/sanitize/entry_summary_onmouseout.xml +9 -0
  2970. data/tests/wellformed/sanitize/entry_summary_onmouseover.xml +9 -0
  2971. data/tests/wellformed/sanitize/entry_summary_onmouseup.xml +9 -0
  2972. data/tests/wellformed/sanitize/entry_summary_onreset.xml +9 -0
  2973. data/tests/wellformed/sanitize/entry_summary_onresize.xml +9 -0
  2974. data/tests/wellformed/sanitize/entry_summary_onsubmit.xml +9 -0
  2975. data/tests/wellformed/sanitize/entry_summary_onunload.xml +9 -0
  2976. data/tests/wellformed/sanitize/entry_summary_script.xml +9 -0
  2977. data/tests/wellformed/sanitize/entry_summary_script_base64.xml +12 -0
  2978. data/tests/wellformed/sanitize/entry_summary_script_cdata.xml +9 -0
  2979. data/tests/wellformed/sanitize/entry_summary_script_inline.xml +9 -0
  2980. data/tests/wellformed/sanitize/entry_summary_script_map_description.xml +9 -0
  2981. data/tests/wellformed/sanitize/entry_summary_style.xml +9 -0
  2982. data/tests/wellformed/sanitize/entry_title_applet.xml +9 -0
  2983. data/tests/wellformed/sanitize/entry_title_blink.xml +9 -0
  2984. data/tests/wellformed/sanitize/entry_title_crazy.xml +75 -0
  2985. data/tests/wellformed/sanitize/entry_title_embed.xml +9 -0
  2986. data/tests/wellformed/sanitize/entry_title_frame.xml +9 -0
  2987. data/tests/wellformed/sanitize/entry_title_iframe.xml +9 -0
  2988. data/tests/wellformed/sanitize/entry_title_link.xml +9 -0
  2989. data/tests/wellformed/sanitize/entry_title_meta.xml +9 -0
  2990. data/tests/wellformed/sanitize/entry_title_object.xml +9 -0
  2991. data/tests/wellformed/sanitize/entry_title_onabort.xml +9 -0
  2992. data/tests/wellformed/sanitize/entry_title_onblur.xml +9 -0
  2993. data/tests/wellformed/sanitize/entry_title_onchange.xml +9 -0
  2994. data/tests/wellformed/sanitize/entry_title_onclick.xml +9 -0
  2995. data/tests/wellformed/sanitize/entry_title_ondblclick.xml +9 -0
  2996. data/tests/wellformed/sanitize/entry_title_onerror.xml +9 -0
  2997. data/tests/wellformed/sanitize/entry_title_onfocus.xml +9 -0
  2998. data/tests/wellformed/sanitize/entry_title_onkeydown.xml +9 -0
  2999. data/tests/wellformed/sanitize/entry_title_onkeypress.xml +9 -0
  3000. data/tests/wellformed/sanitize/entry_title_onkeyup.xml +9 -0
  3001. data/tests/wellformed/sanitize/entry_title_onload.xml +9 -0
  3002. data/tests/wellformed/sanitize/entry_title_onmousedown.xml +9 -0
  3003. data/tests/wellformed/sanitize/entry_title_onmouseout.xml +9 -0
  3004. data/tests/wellformed/sanitize/entry_title_onmouseover.xml +9 -0
  3005. data/tests/wellformed/sanitize/entry_title_onmouseup.xml +9 -0
  3006. data/tests/wellformed/sanitize/entry_title_onreset.xml +9 -0
  3007. data/tests/wellformed/sanitize/entry_title_onresize.xml +9 -0
  3008. data/tests/wellformed/sanitize/entry_title_onsubmit.xml +9 -0
  3009. data/tests/wellformed/sanitize/entry_title_onunload.xml +9 -0
  3010. data/tests/wellformed/sanitize/entry_title_script.xml +9 -0
  3011. data/tests/wellformed/sanitize/entry_title_script_cdata.xml +9 -0
  3012. data/tests/wellformed/sanitize/entry_title_script_inline.xml +9 -0
  3013. data/tests/wellformed/sanitize/entry_title_style.xml +9 -0
  3014. data/tests/wellformed/sanitize/feed_copyright_applet.xml +7 -0
  3015. data/tests/wellformed/sanitize/feed_copyright_blink.xml +7 -0
  3016. data/tests/wellformed/sanitize/feed_copyright_crazy.xml +73 -0
  3017. data/tests/wellformed/sanitize/feed_copyright_embed.xml +7 -0
  3018. data/tests/wellformed/sanitize/feed_copyright_frame.xml +7 -0
  3019. data/tests/wellformed/sanitize/feed_copyright_iframe.xml +7 -0
  3020. data/tests/wellformed/sanitize/feed_copyright_link.xml +7 -0
  3021. data/tests/wellformed/sanitize/feed_copyright_meta.xml +7 -0
  3022. data/tests/wellformed/sanitize/feed_copyright_object.xml +7 -0
  3023. data/tests/wellformed/sanitize/feed_copyright_onabort.xml +7 -0
  3024. data/tests/wellformed/sanitize/feed_copyright_onblur.xml +7 -0
  3025. data/tests/wellformed/sanitize/feed_copyright_onchange.xml +7 -0
  3026. data/tests/wellformed/sanitize/feed_copyright_onclick.xml +7 -0
  3027. data/tests/wellformed/sanitize/feed_copyright_ondblclick.xml +7 -0
  3028. data/tests/wellformed/sanitize/feed_copyright_onerror.xml +7 -0
  3029. data/tests/wellformed/sanitize/feed_copyright_onfocus.xml +7 -0
  3030. data/tests/wellformed/sanitize/feed_copyright_onkeydown.xml +7 -0
  3031. data/tests/wellformed/sanitize/feed_copyright_onkeypress.xml +7 -0
  3032. data/tests/wellformed/sanitize/feed_copyright_onkeyup.xml +7 -0
  3033. data/tests/wellformed/sanitize/feed_copyright_onload.xml +7 -0
  3034. data/tests/wellformed/sanitize/feed_copyright_onmousedown.xml +7 -0
  3035. data/tests/wellformed/sanitize/feed_copyright_onmouseout.xml +7 -0
  3036. data/tests/wellformed/sanitize/feed_copyright_onmouseover.xml +7 -0
  3037. data/tests/wellformed/sanitize/feed_copyright_onmouseup.xml +7 -0
  3038. data/tests/wellformed/sanitize/feed_copyright_onreset.xml +7 -0
  3039. data/tests/wellformed/sanitize/feed_copyright_onresize.xml +7 -0
  3040. data/tests/wellformed/sanitize/feed_copyright_onsubmit.xml +7 -0
  3041. data/tests/wellformed/sanitize/feed_copyright_onunload.xml +7 -0
  3042. data/tests/wellformed/sanitize/feed_copyright_script.xml +7 -0
  3043. data/tests/wellformed/sanitize/feed_copyright_script_cdata.xml +7 -0
  3044. data/tests/wellformed/sanitize/feed_copyright_script_inline.xml +7 -0
  3045. data/tests/wellformed/sanitize/feed_copyright_style.xml +7 -0
  3046. data/tests/wellformed/sanitize/feed_info_applet.xml +7 -0
  3047. data/tests/wellformed/sanitize/feed_info_blink.xml +7 -0
  3048. data/tests/wellformed/sanitize/feed_info_crazy.xml +73 -0
  3049. data/tests/wellformed/sanitize/feed_info_embed.xml +7 -0
  3050. data/tests/wellformed/sanitize/feed_info_frame.xml +7 -0
  3051. data/tests/wellformed/sanitize/feed_info_iframe.xml +7 -0
  3052. data/tests/wellformed/sanitize/feed_info_link.xml +7 -0
  3053. data/tests/wellformed/sanitize/feed_info_meta.xml +7 -0
  3054. data/tests/wellformed/sanitize/feed_info_object.xml +7 -0
  3055. data/tests/wellformed/sanitize/feed_info_onabort.xml +7 -0
  3056. data/tests/wellformed/sanitize/feed_info_onblur.xml +7 -0
  3057. data/tests/wellformed/sanitize/feed_info_onchange.xml +7 -0
  3058. data/tests/wellformed/sanitize/feed_info_onclick.xml +7 -0
  3059. data/tests/wellformed/sanitize/feed_info_ondblclick.xml +7 -0
  3060. data/tests/wellformed/sanitize/feed_info_onerror.xml +7 -0
  3061. data/tests/wellformed/sanitize/feed_info_onfocus.xml +7 -0
  3062. data/tests/wellformed/sanitize/feed_info_onkeydown.xml +7 -0
  3063. data/tests/wellformed/sanitize/feed_info_onkeypress.xml +7 -0
  3064. data/tests/wellformed/sanitize/feed_info_onkeyup.xml +7 -0
  3065. data/tests/wellformed/sanitize/feed_info_onload.xml +7 -0
  3066. data/tests/wellformed/sanitize/feed_info_onmousedown.xml +7 -0
  3067. data/tests/wellformed/sanitize/feed_info_onmouseout.xml +7 -0
  3068. data/tests/wellformed/sanitize/feed_info_onmouseover.xml +7 -0
  3069. data/tests/wellformed/sanitize/feed_info_onmouseup.xml +7 -0
  3070. data/tests/wellformed/sanitize/feed_info_onreset.xml +7 -0
  3071. data/tests/wellformed/sanitize/feed_info_onresize.xml +7 -0
  3072. data/tests/wellformed/sanitize/feed_info_onsubmit.xml +7 -0
  3073. data/tests/wellformed/sanitize/feed_info_onunload.xml +7 -0
  3074. data/tests/wellformed/sanitize/feed_info_script.xml +7 -0
  3075. data/tests/wellformed/sanitize/feed_info_script_cdata.xml +7 -0
  3076. data/tests/wellformed/sanitize/feed_info_script_inline.xml +7 -0
  3077. data/tests/wellformed/sanitize/feed_info_style.xml +7 -0
  3078. data/tests/wellformed/sanitize/feed_subtitle_applet.xml +7 -0
  3079. data/tests/wellformed/sanitize/feed_subtitle_blink.xml +7 -0
  3080. data/tests/wellformed/sanitize/feed_subtitle_crazy.xml +73 -0
  3081. data/tests/wellformed/sanitize/feed_subtitle_embed.xml +7 -0
  3082. data/tests/wellformed/sanitize/feed_subtitle_frame.xml +7 -0
  3083. data/tests/wellformed/sanitize/feed_subtitle_iframe.xml +7 -0
  3084. data/tests/wellformed/sanitize/feed_subtitle_link.xml +7 -0
  3085. data/tests/wellformed/sanitize/feed_subtitle_meta.xml +7 -0
  3086. data/tests/wellformed/sanitize/feed_subtitle_object.xml +7 -0
  3087. data/tests/wellformed/sanitize/feed_subtitle_onabort.xml +7 -0
  3088. data/tests/wellformed/sanitize/feed_subtitle_onblur.xml +7 -0
  3089. data/tests/wellformed/sanitize/feed_subtitle_onchange.xml +7 -0
  3090. data/tests/wellformed/sanitize/feed_subtitle_onclick.xml +7 -0
  3091. data/tests/wellformed/sanitize/feed_subtitle_ondblclick.xml +7 -0
  3092. data/tests/wellformed/sanitize/feed_subtitle_onerror.xml +7 -0
  3093. data/tests/wellformed/sanitize/feed_subtitle_onfocus.xml +7 -0
  3094. data/tests/wellformed/sanitize/feed_subtitle_onkeydown.xml +7 -0
  3095. data/tests/wellformed/sanitize/feed_subtitle_onkeypress.xml +7 -0
  3096. data/tests/wellformed/sanitize/feed_subtitle_onkeyup.xml +7 -0
  3097. data/tests/wellformed/sanitize/feed_subtitle_onload.xml +7 -0
  3098. data/tests/wellformed/sanitize/feed_subtitle_onmousedown.xml +7 -0
  3099. data/tests/wellformed/sanitize/feed_subtitle_onmouseout.xml +7 -0
  3100. data/tests/wellformed/sanitize/feed_subtitle_onmouseover.xml +7 -0
  3101. data/tests/wellformed/sanitize/feed_subtitle_onmouseup.xml +7 -0
  3102. data/tests/wellformed/sanitize/feed_subtitle_onreset.xml +7 -0
  3103. data/tests/wellformed/sanitize/feed_subtitle_onresize.xml +7 -0
  3104. data/tests/wellformed/sanitize/feed_subtitle_onsubmit.xml +7 -0
  3105. data/tests/wellformed/sanitize/feed_subtitle_onunload.xml +7 -0
  3106. data/tests/wellformed/sanitize/feed_subtitle_script.xml +7 -0
  3107. data/tests/wellformed/sanitize/feed_subtitle_script_cdata.xml +7 -0
  3108. data/tests/wellformed/sanitize/feed_subtitle_script_inline.xml +7 -0
  3109. data/tests/wellformed/sanitize/feed_subtitle_style.xml +7 -0
  3110. data/tests/wellformed/sanitize/feed_tagline_applet.xml +7 -0
  3111. data/tests/wellformed/sanitize/feed_tagline_blink.xml +7 -0
  3112. data/tests/wellformed/sanitize/feed_tagline_crazy.xml +73 -0
  3113. data/tests/wellformed/sanitize/feed_tagline_embed.xml +7 -0
  3114. data/tests/wellformed/sanitize/feed_tagline_frame.xml +7 -0
  3115. data/tests/wellformed/sanitize/feed_tagline_iframe.xml +7 -0
  3116. data/tests/wellformed/sanitize/feed_tagline_link.xml +7 -0
  3117. data/tests/wellformed/sanitize/feed_tagline_meta.xml +7 -0
  3118. data/tests/wellformed/sanitize/feed_tagline_object.xml +7 -0
  3119. data/tests/wellformed/sanitize/feed_tagline_onabort.xml +7 -0
  3120. data/tests/wellformed/sanitize/feed_tagline_onblur.xml +7 -0
  3121. data/tests/wellformed/sanitize/feed_tagline_onchange.xml +7 -0
  3122. data/tests/wellformed/sanitize/feed_tagline_onclick.xml +7 -0
  3123. data/tests/wellformed/sanitize/feed_tagline_ondblclick.xml +7 -0
  3124. data/tests/wellformed/sanitize/feed_tagline_onerror.xml +7 -0
  3125. data/tests/wellformed/sanitize/feed_tagline_onfocus.xml +7 -0
  3126. data/tests/wellformed/sanitize/feed_tagline_onkeydown.xml +7 -0
  3127. data/tests/wellformed/sanitize/feed_tagline_onkeypress.xml +7 -0
  3128. data/tests/wellformed/sanitize/feed_tagline_onkeyup.xml +7 -0
  3129. data/tests/wellformed/sanitize/feed_tagline_onload.xml +7 -0
  3130. data/tests/wellformed/sanitize/feed_tagline_onmousedown.xml +7 -0
  3131. data/tests/wellformed/sanitize/feed_tagline_onmouseout.xml +7 -0
  3132. data/tests/wellformed/sanitize/feed_tagline_onmouseover.xml +7 -0
  3133. data/tests/wellformed/sanitize/feed_tagline_onmouseup.xml +7 -0
  3134. data/tests/wellformed/sanitize/feed_tagline_onreset.xml +7 -0
  3135. data/tests/wellformed/sanitize/feed_tagline_onresize.xml +7 -0
  3136. data/tests/wellformed/sanitize/feed_tagline_onsubmit.xml +7 -0
  3137. data/tests/wellformed/sanitize/feed_tagline_onunload.xml +7 -0
  3138. data/tests/wellformed/sanitize/feed_tagline_script.xml +7 -0
  3139. data/tests/wellformed/sanitize/feed_tagline_script_cdata.xml +7 -0
  3140. data/tests/wellformed/sanitize/feed_tagline_script_inline.xml +7 -0
  3141. data/tests/wellformed/sanitize/feed_tagline_script_map_description.xml +7 -0
  3142. data/tests/wellformed/sanitize/feed_tagline_style.xml +7 -0
  3143. data/tests/wellformed/sanitize/feed_title_applet.xml +7 -0
  3144. data/tests/wellformed/sanitize/feed_title_blink.xml +7 -0
  3145. data/tests/wellformed/sanitize/feed_title_crazy.xml +73 -0
  3146. data/tests/wellformed/sanitize/feed_title_embed.xml +7 -0
  3147. data/tests/wellformed/sanitize/feed_title_frame.xml +7 -0
  3148. data/tests/wellformed/sanitize/feed_title_iframe.xml +7 -0
  3149. data/tests/wellformed/sanitize/feed_title_link.xml +7 -0
  3150. data/tests/wellformed/sanitize/feed_title_meta.xml +7 -0
  3151. data/tests/wellformed/sanitize/feed_title_object.xml +7 -0
  3152. data/tests/wellformed/sanitize/feed_title_onabort.xml +7 -0
  3153. data/tests/wellformed/sanitize/feed_title_onblur.xml +7 -0
  3154. data/tests/wellformed/sanitize/feed_title_onchange.xml +7 -0
  3155. data/tests/wellformed/sanitize/feed_title_onclick.xml +7 -0
  3156. data/tests/wellformed/sanitize/feed_title_ondblclick.xml +7 -0
  3157. data/tests/wellformed/sanitize/feed_title_onerror.xml +7 -0
  3158. data/tests/wellformed/sanitize/feed_title_onfocus.xml +7 -0
  3159. data/tests/wellformed/sanitize/feed_title_onkeydown.xml +7 -0
  3160. data/tests/wellformed/sanitize/feed_title_onkeypress.xml +7 -0
  3161. data/tests/wellformed/sanitize/feed_title_onkeyup.xml +7 -0
  3162. data/tests/wellformed/sanitize/feed_title_onload.xml +7 -0
  3163. data/tests/wellformed/sanitize/feed_title_onmousedown.xml +7 -0
  3164. data/tests/wellformed/sanitize/feed_title_onmouseout.xml +7 -0
  3165. data/tests/wellformed/sanitize/feed_title_onmouseover.xml +7 -0
  3166. data/tests/wellformed/sanitize/feed_title_onmouseup.xml +7 -0
  3167. data/tests/wellformed/sanitize/feed_title_onreset.xml +7 -0
  3168. data/tests/wellformed/sanitize/feed_title_onresize.xml +7 -0
  3169. data/tests/wellformed/sanitize/feed_title_onsubmit.xml +7 -0
  3170. data/tests/wellformed/sanitize/feed_title_onunload.xml +7 -0
  3171. data/tests/wellformed/sanitize/feed_title_script.xml +7 -0
  3172. data/tests/wellformed/sanitize/feed_title_script_cdata.xml +7 -0
  3173. data/tests/wellformed/sanitize/feed_title_script_inline.xml +7 -0
  3174. data/tests/wellformed/sanitize/feed_title_style.xml +7 -0
  3175. data/tests/wellformed/sanitize/item_body_applet.xml +11 -0
  3176. data/tests/wellformed/sanitize/item_body_blink.xml +11 -0
  3177. data/tests/wellformed/sanitize/item_body_embed.xml +11 -0
  3178. data/tests/wellformed/sanitize/item_body_frame.xml +11 -0
  3179. data/tests/wellformed/sanitize/item_body_iframe.xml +11 -0
  3180. data/tests/wellformed/sanitize/item_body_link.xml +11 -0
  3181. data/tests/wellformed/sanitize/item_body_meta.xml +11 -0
  3182. data/tests/wellformed/sanitize/item_body_object.xml +11 -0
  3183. data/tests/wellformed/sanitize/item_body_onabort.xml +11 -0
  3184. data/tests/wellformed/sanitize/item_body_onblur.xml +11 -0
  3185. data/tests/wellformed/sanitize/item_body_onchange.xml +11 -0
  3186. data/tests/wellformed/sanitize/item_body_onclick.xml +11 -0
  3187. data/tests/wellformed/sanitize/item_body_ondblclick.xml +11 -0
  3188. data/tests/wellformed/sanitize/item_body_onerror.xml +11 -0
  3189. data/tests/wellformed/sanitize/item_body_onfocus.xml +11 -0
  3190. data/tests/wellformed/sanitize/item_body_onkeydown.xml +11 -0
  3191. data/tests/wellformed/sanitize/item_body_onkeypress.xml +11 -0
  3192. data/tests/wellformed/sanitize/item_body_onkeyup.xml +11 -0
  3193. data/tests/wellformed/sanitize/item_body_onload.xml +11 -0
  3194. data/tests/wellformed/sanitize/item_body_onmousedown.xml +11 -0
  3195. data/tests/wellformed/sanitize/item_body_onmouseout.xml +11 -0
  3196. data/tests/wellformed/sanitize/item_body_onmouseover.xml +11 -0
  3197. data/tests/wellformed/sanitize/item_body_onmouseup.xml +11 -0
  3198. data/tests/wellformed/sanitize/item_body_onreset.xml +11 -0
  3199. data/tests/wellformed/sanitize/item_body_onresize.xml +11 -0
  3200. data/tests/wellformed/sanitize/item_body_onsubmit.xml +11 -0
  3201. data/tests/wellformed/sanitize/item_body_onunload.xml +11 -0
  3202. data/tests/wellformed/sanitize/item_body_script.xml +11 -0
  3203. data/tests/wellformed/sanitize/item_body_script_map_content.xml +11 -0
  3204. data/tests/wellformed/sanitize/item_body_style.xml +11 -0
  3205. data/tests/wellformed/sanitize/item_content_encoded_applet.xml +11 -0
  3206. data/tests/wellformed/sanitize/item_content_encoded_blink.xml +11 -0
  3207. data/tests/wellformed/sanitize/item_content_encoded_crazy.xml +77 -0
  3208. data/tests/wellformed/sanitize/item_content_encoded_embed.xml +11 -0
  3209. data/tests/wellformed/sanitize/item_content_encoded_frame.xml +11 -0
  3210. data/tests/wellformed/sanitize/item_content_encoded_iframe.xml +11 -0
  3211. data/tests/wellformed/sanitize/item_content_encoded_link.xml +11 -0
  3212. data/tests/wellformed/sanitize/item_content_encoded_map_content.xml +11 -0
  3213. data/tests/wellformed/sanitize/item_content_encoded_meta.xml +11 -0
  3214. data/tests/wellformed/sanitize/item_content_encoded_object.xml +11 -0
  3215. data/tests/wellformed/sanitize/item_content_encoded_onabort.xml +11 -0
  3216. data/tests/wellformed/sanitize/item_content_encoded_onblur.xml +11 -0
  3217. data/tests/wellformed/sanitize/item_content_encoded_onchange.xml +11 -0
  3218. data/tests/wellformed/sanitize/item_content_encoded_onclick.xml +11 -0
  3219. data/tests/wellformed/sanitize/item_content_encoded_ondblclick.xml +11 -0
  3220. data/tests/wellformed/sanitize/item_content_encoded_onerror.xml +11 -0
  3221. data/tests/wellformed/sanitize/item_content_encoded_onfocus.xml +11 -0
  3222. data/tests/wellformed/sanitize/item_content_encoded_onkeydown.xml +11 -0
  3223. data/tests/wellformed/sanitize/item_content_encoded_onkeypress.xml +11 -0
  3224. data/tests/wellformed/sanitize/item_content_encoded_onkeyup.xml +11 -0
  3225. data/tests/wellformed/sanitize/item_content_encoded_onload.xml +11 -0
  3226. data/tests/wellformed/sanitize/item_content_encoded_onmousedown.xml +11 -0
  3227. data/tests/wellformed/sanitize/item_content_encoded_onmouseout.xml +11 -0
  3228. data/tests/wellformed/sanitize/item_content_encoded_onmouseover.xml +11 -0
  3229. data/tests/wellformed/sanitize/item_content_encoded_onmouseup.xml +11 -0
  3230. data/tests/wellformed/sanitize/item_content_encoded_onreset.xml +11 -0
  3231. data/tests/wellformed/sanitize/item_content_encoded_onresize.xml +11 -0
  3232. data/tests/wellformed/sanitize/item_content_encoded_onsubmit.xml +11 -0
  3233. data/tests/wellformed/sanitize/item_content_encoded_onunload.xml +11 -0
  3234. data/tests/wellformed/sanitize/item_content_encoded_script.xml +11 -0
  3235. data/tests/wellformed/sanitize/item_content_encoded_script_cdata.xml +11 -0
  3236. data/tests/wellformed/sanitize/item_content_encoded_script_map_content.xml +11 -0
  3237. data/tests/wellformed/sanitize/item_content_encoded_style.xml +11 -0
  3238. data/tests/wellformed/sanitize/item_description_applet.xml +11 -0
  3239. data/tests/wellformed/sanitize/item_description_blink.xml +11 -0
  3240. data/tests/wellformed/sanitize/item_description_crazy.xml +81 -0
  3241. data/tests/wellformed/sanitize/item_description_embed.xml +11 -0
  3242. data/tests/wellformed/sanitize/item_description_frame.xml +11 -0
  3243. data/tests/wellformed/sanitize/item_description_iframe.xml +11 -0
  3244. data/tests/wellformed/sanitize/item_description_link.xml +11 -0
  3245. data/tests/wellformed/sanitize/item_description_meta.xml +11 -0
  3246. data/tests/wellformed/sanitize/item_description_object.xml +11 -0
  3247. data/tests/wellformed/sanitize/item_description_onabort.xml +11 -0
  3248. data/tests/wellformed/sanitize/item_description_onblur.xml +11 -0
  3249. data/tests/wellformed/sanitize/item_description_onchange.xml +11 -0
  3250. data/tests/wellformed/sanitize/item_description_onclick.xml +11 -0
  3251. data/tests/wellformed/sanitize/item_description_ondblclick.xml +11 -0
  3252. data/tests/wellformed/sanitize/item_description_onerror.xml +11 -0
  3253. data/tests/wellformed/sanitize/item_description_onfocus.xml +11 -0
  3254. data/tests/wellformed/sanitize/item_description_onkeydown.xml +11 -0
  3255. data/tests/wellformed/sanitize/item_description_onkeypress.xml +11 -0
  3256. data/tests/wellformed/sanitize/item_description_onkeyup.xml +11 -0
  3257. data/tests/wellformed/sanitize/item_description_onload.xml +11 -0
  3258. data/tests/wellformed/sanitize/item_description_onmousedown.xml +11 -0
  3259. data/tests/wellformed/sanitize/item_description_onmouseout.xml +11 -0
  3260. data/tests/wellformed/sanitize/item_description_onmouseover.xml +11 -0
  3261. data/tests/wellformed/sanitize/item_description_onmouseup.xml +11 -0
  3262. data/tests/wellformed/sanitize/item_description_onreset.xml +11 -0
  3263. data/tests/wellformed/sanitize/item_description_onresize.xml +11 -0
  3264. data/tests/wellformed/sanitize/item_description_onsubmit.xml +11 -0
  3265. data/tests/wellformed/sanitize/item_description_onunload.xml +11 -0
  3266. data/tests/wellformed/sanitize/item_description_script.xml +11 -0
  3267. data/tests/wellformed/sanitize/item_description_script_cdata.xml +11 -0
  3268. data/tests/wellformed/sanitize/item_description_script_map_summary.xml +11 -0
  3269. data/tests/wellformed/sanitize/item_description_style.xml +11 -0
  3270. data/tests/wellformed/sanitize/item_fullitem_applet.xml +11 -0
  3271. data/tests/wellformed/sanitize/item_fullitem_blink.xml +11 -0
  3272. data/tests/wellformed/sanitize/item_fullitem_crazy.xml +77 -0
  3273. data/tests/wellformed/sanitize/item_fullitem_embed.xml +11 -0
  3274. data/tests/wellformed/sanitize/item_fullitem_frame.xml +11 -0
  3275. data/tests/wellformed/sanitize/item_fullitem_iframe.xml +11 -0
  3276. data/tests/wellformed/sanitize/item_fullitem_link.xml +11 -0
  3277. data/tests/wellformed/sanitize/item_fullitem_meta.xml +11 -0
  3278. data/tests/wellformed/sanitize/item_fullitem_object.xml +11 -0
  3279. data/tests/wellformed/sanitize/item_fullitem_onabort.xml +11 -0
  3280. data/tests/wellformed/sanitize/item_fullitem_onblur.xml +11 -0
  3281. data/tests/wellformed/sanitize/item_fullitem_onchange.xml +11 -0
  3282. data/tests/wellformed/sanitize/item_fullitem_onclick.xml +11 -0
  3283. data/tests/wellformed/sanitize/item_fullitem_ondblclick.xml +11 -0
  3284. data/tests/wellformed/sanitize/item_fullitem_onerror.xml +11 -0
  3285. data/tests/wellformed/sanitize/item_fullitem_onfocus.xml +11 -0
  3286. data/tests/wellformed/sanitize/item_fullitem_onkeydown.xml +11 -0
  3287. data/tests/wellformed/sanitize/item_fullitem_onkeypress.xml +11 -0
  3288. data/tests/wellformed/sanitize/item_fullitem_onkeyup.xml +11 -0
  3289. data/tests/wellformed/sanitize/item_fullitem_onload.xml +11 -0
  3290. data/tests/wellformed/sanitize/item_fullitem_onmousedown.xml +11 -0
  3291. data/tests/wellformed/sanitize/item_fullitem_onmouseout.xml +11 -0
  3292. data/tests/wellformed/sanitize/item_fullitem_onmouseover.xml +11 -0
  3293. data/tests/wellformed/sanitize/item_fullitem_onmouseup.xml +11 -0
  3294. data/tests/wellformed/sanitize/item_fullitem_onreset.xml +11 -0
  3295. data/tests/wellformed/sanitize/item_fullitem_onresize.xml +11 -0
  3296. data/tests/wellformed/sanitize/item_fullitem_onsubmit.xml +11 -0
  3297. data/tests/wellformed/sanitize/item_fullitem_onunload.xml +11 -0
  3298. data/tests/wellformed/sanitize/item_fullitem_script.xml +11 -0
  3299. data/tests/wellformed/sanitize/item_fullitem_script_cdata.xml +11 -0
  3300. data/tests/wellformed/sanitize/item_fullitem_script_map_summary.xml +11 -0
  3301. data/tests/wellformed/sanitize/item_fullitem_style.xml +11 -0
  3302. data/tests/wellformed/sanitize/item_xhtml_body_applet.xml +11 -0
  3303. data/tests/wellformed/sanitize/item_xhtml_body_blink.xml +11 -0
  3304. data/tests/wellformed/sanitize/item_xhtml_body_embed.xml +11 -0
  3305. data/tests/wellformed/sanitize/item_xhtml_body_frame.xml +11 -0
  3306. data/tests/wellformed/sanitize/item_xhtml_body_iframe.xml +11 -0
  3307. data/tests/wellformed/sanitize/item_xhtml_body_link.xml +11 -0
  3308. data/tests/wellformed/sanitize/item_xhtml_body_meta.xml +11 -0
  3309. data/tests/wellformed/sanitize/item_xhtml_body_object.xml +11 -0
  3310. data/tests/wellformed/sanitize/item_xhtml_body_onabort.xml +11 -0
  3311. data/tests/wellformed/sanitize/item_xhtml_body_onblur.xml +11 -0
  3312. data/tests/wellformed/sanitize/item_xhtml_body_onchange.xml +11 -0
  3313. data/tests/wellformed/sanitize/item_xhtml_body_onclick.xml +11 -0
  3314. data/tests/wellformed/sanitize/item_xhtml_body_ondblclick.xml +11 -0
  3315. data/tests/wellformed/sanitize/item_xhtml_body_onerror.xml +11 -0
  3316. data/tests/wellformed/sanitize/item_xhtml_body_onfocus.xml +11 -0
  3317. data/tests/wellformed/sanitize/item_xhtml_body_onkeydown.xml +11 -0
  3318. data/tests/wellformed/sanitize/item_xhtml_body_onkeypress.xml +11 -0
  3319. data/tests/wellformed/sanitize/item_xhtml_body_onkeyup.xml +11 -0
  3320. data/tests/wellformed/sanitize/item_xhtml_body_onload.xml +11 -0
  3321. data/tests/wellformed/sanitize/item_xhtml_body_onmousedown.xml +11 -0
  3322. data/tests/wellformed/sanitize/item_xhtml_body_onmouseout.xml +11 -0
  3323. data/tests/wellformed/sanitize/item_xhtml_body_onmouseover.xml +11 -0
  3324. data/tests/wellformed/sanitize/item_xhtml_body_onmouseup.xml +11 -0
  3325. data/tests/wellformed/sanitize/item_xhtml_body_onreset.xml +11 -0
  3326. data/tests/wellformed/sanitize/item_xhtml_body_onresize.xml +11 -0
  3327. data/tests/wellformed/sanitize/item_xhtml_body_onsubmit.xml +11 -0
  3328. data/tests/wellformed/sanitize/item_xhtml_body_onunload.xml +11 -0
  3329. data/tests/wellformed/sanitize/item_xhtml_body_script.xml +11 -0
  3330. data/tests/wellformed/sanitize/item_xhtml_body_script_map_content.xml +11 -0
  3331. data/tests/wellformed/sanitize/item_xhtml_body_style.xml +11 -0
  3332. metadata +3472 -0
data/LICENSE ADDED
@@ -0,0 +1,68 @@
1
+ I include this license in good faith effort, and it should be considered the license for the code herein.
2
+ - Jeff Hodges < jeff at somethingsimilar.com >
3
+ --
4
+ Universal Feed Parser (feedparser.py), its testing harness (feedparsertest.py),
5
+ and its unit tests (everything in the tests/ directory) are released under the
6
+ following license:
7
+
8
+ ----- begin license block -----
9
+
10
+ Copyright (c) 2002-2005, Mark Pilgrim
11
+ All rights reserved.
12
+
13
+ Redistribution and use in source and binary forms, with or without modification,
14
+ are permitted provided that the following conditions are met:
15
+
16
+ * Redistributions of source code must retain the above copyright notice,
17
+ this list of conditions and the following disclaimer.
18
+ * Redistributions in binary form must reproduce the above copyright notice,
19
+ this list of conditions and the following disclaimer in the documentation
20
+ and/or other materials provided with the distribution.
21
+
22
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
23
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32
+ POSSIBILITY OF SUCH DAMAGE.
33
+
34
+ ----- end license block -----
35
+
36
+
37
+
38
+
39
+
40
+ Universal Feed Parser documentation (everything in the docs/ directory) is
41
+ released under the following license:
42
+
43
+ ----- begin license block -----
44
+
45
+ Copyright 2004-2005 Mark Pilgrim. All rights reserved.
46
+
47
+ Redistribution and use in source (XML DocBook) and "compiled" forms (SGML,
48
+ HTML, PDF, PostScript, RTF and so forth) with or without modification, are
49
+ permitted provided that the following conditions are met:
50
+
51
+ * Redistributions of source code (XML DocBook) must retain the above copyright
52
+ notice, this list of conditions and the following disclaimer.
53
+ * Redistributions in compiled form (transformed to other DTDs, converted to
54
+ PDF, PostScript, RTF and other formats) must reproduce the above copyright
55
+ notice, this list of conditions and the following disclaimer in the
56
+ documentation and/or other materials provided with the distribution.
57
+
58
+ THIS DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
59
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
62
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
63
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
64
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
65
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
66
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
67
+ ARISING IN ANY WAY OUT OF THE USE OF THIS DOCUMENTATION, EVEN IF ADVISED OF THE
68
+ POSSIBILITY OF SUCH DAMAGE.
data/README ADDED
@@ -0,0 +1,28 @@
1
+ Universal Feed Parser
2
+ Parse RSS and Atom feeds in Python. 3000 unit tests. Open source.
3
+
4
+ Copyright (c) 2002-5 by Mark Pilgrim
5
+ open source, see LICENSE file for details
6
+
7
+ -----
8
+
9
+ To use:
10
+ If installed as a gem
11
+ require 'rubygems'
12
+ gem 'rfeedparser
13
+ require 'feedparser'
14
+
15
+ fp = FeedParser.parse("some-feed-filepath-or-url")
16
+
17
+ If not installed as a gem, copy the contents of lib into the ruby path and just use
18
+ require 'feedparser'
19
+
20
+ fp = FeedParser.parse("some-feed-filepath-or-url")
21
+
22
+ ----
23
+
24
+ For developers:
25
+ I currently have the "rough" code in a bzr branch over at
26
+ <http://somethingsimilar.com/code/bzr/rfeedparser/>. You'll want
27
+ to check out the rfeedparser-main branch for the current code, or
28
+ rfeedparser-release for the code in the latest release.
@@ -0,0 +1,60 @@
1
+ === Testing rFeedParser ===
2
+ Simply run `ruby feedparsertest.rb` to run all of the FeedParser tests.
3
+ Optionally, you can start up feedparserserver.rb and run feedparser.rb
4
+ against "http://localhost:8097/tests/path/to/testcase.xml" if you want
5
+ to try a test individually. I'll probably merge feedparserserver.rb into
6
+ feedparsertext.rb soon.
7
+
8
+ === Last Count 20070321 ===
9
+ By my last count, feedparsertext.rb says that there are 45 assertions
10
+ that fail, and 4 that error out. I've included here a few tests that
11
+ "Failed, Sort Of". By that I mean, the behaviors the tests are meant to
12
+ check are correct, but the test fails because of some other superficial
13
+ or unrelated behavior.
14
+
15
+ === Tests Failed, Sort Of ===
16
+
17
+ Problem:
18
+ Hpricot adds end tags when it sees an unclosed tag. This means that
19
+ certain tests that rely on feedparser.py's _HTMLSanitizer not closing
20
+ tags will fail. Many of the tests affected (actually, all the ones
21
+ affected, AFAICT) would otherwise passed.
22
+
23
+ Tests Affected:
24
+ * tests/wellformed/rss/item_description_not_a_doctype.xml (extraneous trailing </a>)
25
+ * tests/illformed/rss/item_description_not_a_doctype.xml (ditto)
26
+ ==
27
+ Problem:
28
+ The Hpricot#scrub method I've written does not remove the dangerous
29
+ markup in the same way feedparser.py does, but the output is still safe.
30
+
31
+ Tests Affected:
32
+ * tests/wellformed/sanitize/entry_content_crazy.xml
33
+ * tests/wellformed/sanitize/entry_summary_crazy.xml
34
+ * tests/wellformed/sanitize/entry_title_crazy.xml
35
+ * tests/wellformed/sanitize/feed_copyright_crazy.xml
36
+ * tests/wellformed/sanitize/feed_info_crazy.xml
37
+ * tests/wellformed/sanitize/feed_subtitle_crazy.xml
38
+ * tests/wellformed/sanitize/feed_tagline_crazy.xml
39
+ * tests/wellformed/sanitize/feed_title_crazy.xml
40
+ * tests/wellformed/sanitize/item_content_encoded_crazy.xml
41
+ * tests/wellformed/sanitize/item_description_crazy.xml
42
+ * tests/wellformed/sanitize/item_fullitem_crazy.xml
43
+ * tests/illformed/sanitize/entry_content_crazy.xml
44
+ * tests/illformed/sanitize/entry_summary_crazy.xml
45
+ * tests/illformed/sanitize/entry_title_crazy.xml
46
+ * tests/illformed/sanitize/feed_copyright_crazy.xml
47
+ * tests/illformed/sanitize/feed_info_crazy.xml
48
+ * tests/illformed/sanitize/feed_subtitle_crazy.xml
49
+ * tests/illformed/sanitize/feed_tagline_crazy.xml
50
+ * tests/illformed/sanitize/feed_title_crazy.xml
51
+ * tests/illformed/sanitize/item_content_encoded_crazy.xml
52
+ * tests/illformed/sanitize/item_description_crazy.xml
53
+ * tests/illformed/sanitize/item_fullitem_crazy.xml
54
+ ==
55
+
56
+ Problem:
57
+ My current system lacks a few encodings that rfeedparser and Iconv need.
58
+ This results in failures that will probably not occur on other machines.
59
+
60
+ Tests Affected:
@@ -0,0 +1,3671 @@
1
+ #!/usr/bin/env ruby
2
+ """Universal feed parser in Ruby
3
+
4
+ Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds
5
+
6
+ Visit http://feedparser.org/ for the latest version in Python
7
+ Visit http://feedparser.org/docs/ for the latest documentation
8
+ Email Jeff Hodges at jeff@obquo.com for questions
9
+
10
+ Required: Ruby 1.8
11
+ """
12
+ $KCODE = 'UTF8'
13
+ require 'stringio'
14
+ require 'uri'
15
+ require 'cgi' # escaping html
16
+ require 'time'
17
+ require 'xml/saxdriver' # calling expat
18
+ require 'pp'
19
+ require 'rubygems'
20
+ require 'base64'
21
+ require 'iconv'
22
+ begin
23
+ gem 'hpricot', ">=0.5"
24
+ gem 'character-encodings', ">=0.2.0"
25
+ gem 'htmltools'
26
+ gem 'htmlentities'
27
+ gem 'activesupport'
28
+ gem 'rchardet'
29
+ rescue Gem::LoadError,LoadError
30
+ end
31
+
32
+ require 'chardet'
33
+ $chardet = true
34
+
35
+ require 'hpricot'
36
+ require 'encoding/character/utf-8'
37
+ require 'html/sgml-parser'
38
+ require 'htmlentities'
39
+ require 'active_support'
40
+ require 'open-uri'
41
+ include OpenURI
42
+
43
+ $debug = false
44
+ $compatible = true
45
+
46
+ Encoding_Aliases = { # Adapted from python2.4's encodings/aliases.py
47
+ # ascii codec
48
+ '646' => 'ascii',
49
+ 'ansi_x3.4_1968' => 'ascii',
50
+ 'ansi_x3_4_1968' => 'ascii', # some email headers use this non-standard name
51
+ 'ansi_x3.4_1986' => 'ascii',
52
+ 'cp367' => 'ascii',
53
+ 'csascii' => 'ascii',
54
+ 'ibm367' => 'ascii',
55
+ 'iso646_us' => 'ascii',
56
+ 'iso_646.irv_1991' => 'ascii',
57
+ 'iso_ir_6' => 'ascii',
58
+ 'us' => 'ascii',
59
+ 'us_ascii' => 'ascii',
60
+
61
+ # big5 codec
62
+ 'big5_tw' => 'big5',
63
+ 'csbig5' => 'big5',
64
+
65
+ # big5hkscs codec
66
+ 'big5_hkscs' => 'big5hkscs',
67
+ 'hkscs' => 'big5hkscs',
68
+
69
+ # cp037 codec
70
+ '037' => 'cp037',
71
+ 'csibm037' => 'cp037',
72
+ 'ebcdic_cp_ca' => 'cp037',
73
+ 'ebcdic_cp_nl' => 'cp037',
74
+ 'ebcdic_cp_us' => 'cp037',
75
+ 'ebcdic_cp_wt' => 'cp037',
76
+ 'ibm037' => 'cp037',
77
+ 'ibm039' => 'cp037',
78
+
79
+ # cp1026 codec
80
+ '1026' => 'cp1026',
81
+ 'csibm1026' => 'cp1026',
82
+ 'ibm1026' => 'cp1026',
83
+
84
+ # cp1140 codec
85
+ '1140' => 'cp1140',
86
+ 'ibm1140' => 'cp1140',
87
+
88
+ # cp1250 codec
89
+ '1250' => 'cp1250',
90
+ 'windows_1250' => 'cp1250',
91
+
92
+ # cp1251 codec
93
+ '1251' => 'cp1251',
94
+ 'windows_1251' => 'cp1251',
95
+
96
+ # cp1252 codec
97
+ '1252' => 'cp1252',
98
+ 'windows_1252' => 'cp1252',
99
+
100
+ # cp1253 codec
101
+ '1253' => 'cp1253',
102
+ 'windows_1253' => 'cp1253',
103
+
104
+ # cp1254 codec
105
+ '1254' => 'cp1254',
106
+ 'windows_1254' => 'cp1254',
107
+
108
+ # cp1255 codec
109
+ '1255' => 'cp1255',
110
+ 'windows_1255' => 'cp1255',
111
+
112
+ # cp1256 codec
113
+ '1256' => 'cp1256',
114
+ 'windows_1256' => 'cp1256',
115
+
116
+ # cp1257 codec
117
+ '1257' => 'cp1257',
118
+ 'windows_1257' => 'cp1257',
119
+
120
+ # cp1258 codec
121
+ '1258' => 'cp1258',
122
+ 'windows_1258' => 'cp1258',
123
+
124
+ # cp424 codec
125
+ '424' => 'cp424',
126
+ 'csibm424' => 'cp424',
127
+ 'ebcdic_cp_he' => 'cp424',
128
+ 'ibm424' => 'cp424',
129
+
130
+ # cp437 codec
131
+ '437' => 'cp437',
132
+ 'cspc8codepage437' => 'cp437',
133
+ 'ibm437' => 'cp437',
134
+
135
+ # cp500 codec
136
+ '500' => 'cp500',
137
+ 'csibm500' => 'cp500',
138
+ 'ebcdic_cp_be' => 'cp500',
139
+ 'ebcdic_cp_ch' => 'cp500',
140
+ 'ibm500' => 'cp500',
141
+
142
+ # cp775 codec
143
+ '775' => 'cp775',
144
+ 'cspc775baltic' => 'cp775',
145
+ 'ibm775' => 'cp775',
146
+
147
+ # cp850 codec
148
+ '850' => 'cp850',
149
+ 'cspc850multilingual' => 'cp850',
150
+ 'ibm850' => 'cp850',
151
+
152
+ # cp852 codec
153
+ '852' => 'cp852',
154
+ 'cspcp852' => 'cp852',
155
+ 'ibm852' => 'cp852',
156
+
157
+ # cp855 codec
158
+ '855' => 'cp855',
159
+ 'csibm855' => 'cp855',
160
+ 'ibm855' => 'cp855',
161
+
162
+ # cp857 codec
163
+ '857' => 'cp857',
164
+ 'csibm857' => 'cp857',
165
+ 'ibm857' => 'cp857',
166
+
167
+ # cp860 codec
168
+ '860' => 'cp860',
169
+ 'csibm860' => 'cp860',
170
+ 'ibm860' => 'cp860',
171
+
172
+ # cp861 codec
173
+ '861' => 'cp861',
174
+ 'cp_is' => 'cp861',
175
+ 'csibm861' => 'cp861',
176
+ 'ibm861' => 'cp861',
177
+
178
+ # cp862 codec
179
+ '862' => 'cp862',
180
+ 'cspc862latinhebrew' => 'cp862',
181
+ 'ibm862' => 'cp862',
182
+
183
+ # cp863 codec
184
+ '863' => 'cp863',
185
+ 'csibm863' => 'cp863',
186
+ 'ibm863' => 'cp863',
187
+
188
+ # cp864 codec
189
+ '864' => 'cp864',
190
+ 'csibm864' => 'cp864',
191
+ 'ibm864' => 'cp864',
192
+
193
+ # cp865 codec
194
+ '865' => 'cp865',
195
+ 'csibm865' => 'cp865',
196
+ 'ibm865' => 'cp865',
197
+
198
+ # cp866 codec
199
+ '866' => 'cp866',
200
+ 'csibm866' => 'cp866',
201
+ 'ibm866' => 'cp866',
202
+
203
+ # cp869 codec
204
+ '869' => 'cp869',
205
+ 'cp_gr' => 'cp869',
206
+ 'csibm869' => 'cp869',
207
+ 'ibm869' => 'cp869',
208
+
209
+ # cp932 codec
210
+ '932' => 'cp932',
211
+ 'ms932' => 'cp932',
212
+ 'mskanji' => 'cp932',
213
+ 'ms_kanji' => 'cp932',
214
+
215
+ # cp949 codec
216
+ '949' => 'cp949',
217
+ 'ms949' => 'cp949',
218
+ 'uhc' => 'cp949',
219
+
220
+ # cp950 codec
221
+ '950' => 'cp950',
222
+ 'ms950' => 'cp950',
223
+
224
+ # euc_jp codec
225
+ 'euc_jp' => 'euc-jp',
226
+ 'eucjp' => 'euc-jp',
227
+ 'ujis' => 'euc-jp',
228
+ 'u_jis' => 'euc-jp',
229
+
230
+ # euc_kr codec
231
+ 'euc_kr' => 'euc-kr',
232
+ 'euckr' => 'euc-kr',
233
+ 'korean' => 'euc-kr',
234
+ 'ksc5601' => 'euc-kr',
235
+ 'ks_c_5601' => 'euc-kr',
236
+ 'ks_c_5601_1987' => 'euc-kr',
237
+ 'ksx1001' => 'euc-kr',
238
+ 'ks_x_1001' => 'euc-kr',
239
+
240
+ # gb18030 codec
241
+ 'gb18030_2000' => 'gb18030',
242
+
243
+ # gb2312 codec
244
+ 'chinese' => 'gb2312',
245
+ 'csiso58gb231280' => 'gb2312',
246
+ 'euc_cn' => 'gb2312',
247
+ 'euccn' => 'gb2312',
248
+ 'eucgb2312_cn' => 'gb2312',
249
+ 'gb2312_1980' => 'gb2312',
250
+ 'gb2312_80' => 'gb2312',
251
+ 'iso_ir_58' => 'gb2312',
252
+
253
+ # gbk codec
254
+ '936' => 'gbk',
255
+ 'cp936' => 'gbk',
256
+ 'ms936' => 'gbk',
257
+
258
+ # hp-roman8 codec
259
+ 'hp_roman8' => 'hp-roman8',
260
+ 'roman8' => 'hp-roman8',
261
+ 'r8' => 'hp-roman8',
262
+ 'csHPRoman8' => 'hp-roman8',
263
+
264
+ # iso2022_jp codec
265
+ 'iso2022_jp' => 'iso-2022-jp',
266
+ 'csiso2022jp' => 'iso-2022-jp',
267
+ 'iso2022jp' => 'iso-2022-jp',
268
+ 'iso_2022_jp' => 'iso-2022-jp',
269
+
270
+ # iso2022_jp_1 codec
271
+ 'iso2002_jp_1' => 'iso-2022-jp-1',
272
+ 'iso2022jp_1' => 'iso-2022-jp-1',
273
+ 'iso_2022_jp_1' => 'iso-2022-jp-1',
274
+
275
+ # iso2022_jp_2 codec
276
+ 'iso2022_jp_2' => 'iso-2002-jp-2',
277
+ 'iso2022jp_2' => 'iso-2022-jp-2',
278
+ 'iso_2022_jp_2' => 'iso-2022-jp-2',
279
+
280
+ # iso2022_jp_3 codec
281
+ 'iso2002_jp_3' => 'iso-2022-jp-3',
282
+ 'iso2022jp_3' => 'iso-2022-jp-3',
283
+ 'iso_2022_jp_3' => 'iso-2022-jp-3',
284
+
285
+ # iso2022_kr codec
286
+ 'iso2022_kr' => 'iso-2022-kr',
287
+ 'csiso2022kr' => 'iso-2022-kr',
288
+ 'iso2022kr' => 'iso-2022-kr',
289
+ 'iso_2022_kr' => 'iso-2022-kr',
290
+
291
+ # iso8859_10 codec
292
+ 'iso8859_10' => 'iso-8859-10',
293
+ 'csisolatin6' => 'iso-8859-10',
294
+ 'iso_8859_10' => 'iso-8859-10',
295
+ 'iso_8859_10_1992' => 'iso-8859-10',
296
+ 'iso_ir_157' => 'iso-8859-10',
297
+ 'l6' => 'iso-8859-10',
298
+ 'latin6' => 'iso-8859-10',
299
+
300
+ # iso8859_13 codec
301
+ 'iso8859_13' => 'iso-8859-13',
302
+ 'iso_8859_13' => 'iso-8859-13',
303
+
304
+ # iso8859_14 codec
305
+ 'iso8859_14' => 'iso-8859-14',
306
+ 'iso_8859_14' => 'iso-8859-14',
307
+ 'iso_8859_14_1998' => 'iso-8859-14',
308
+ 'iso_celtic' => 'iso-8859-14',
309
+ 'iso_ir_199' => 'iso-8859-14',
310
+ 'l8' => 'iso-8859-14',
311
+ 'latin8' => 'iso-8859-14',
312
+
313
+ # iso8859_15 codec
314
+ 'iso8859_15' => 'iso-8859-15',
315
+ 'iso_8859_15' => 'iso-8859-15',
316
+
317
+ # iso8859_1 codec
318
+ 'latin_1' => 'iso-8859-1',
319
+ 'cp819' => 'iso-8859-1',
320
+ 'csisolatin1' => 'iso-8859-1',
321
+ 'ibm819' => 'iso-8859-1',
322
+ 'iso8859' => 'iso-8859-1',
323
+ 'iso_8859_1' => 'iso-8859-1',
324
+ 'iso_8859_1_1987' => 'iso-8859-1',
325
+ 'iso_ir_100' => 'iso-8859-1',
326
+ 'l1' => 'iso-8859-1',
327
+ 'latin' => 'iso-8859-1',
328
+ 'latin1' => 'iso-8859-1',
329
+
330
+ # iso8859_2 codec
331
+ 'iso8859_2' => 'iso-8859-2',
332
+ 'csisolatin2' => 'iso-8859-2',
333
+ 'iso_8859_2' => 'iso-8859-2',
334
+ 'iso_8859_2_1987' => 'iso-8859-2',
335
+ 'iso_ir_101' => 'iso-8859-2',
336
+ 'l2' => 'iso-8859-2',
337
+ 'latin2' => 'iso-8859-2',
338
+
339
+ # iso8859_3 codec
340
+ 'iso8859_3' => 'iso-8859-3',
341
+ 'csisolatin3' => 'iso-8859-3',
342
+ 'iso_8859_3' => 'iso-8859-3',
343
+ 'iso_8859_3_1988' => 'iso-8859-3',
344
+ 'iso_ir_109' => 'iso-8859-3',
345
+ 'l3' => 'iso-8859-3',
346
+ 'latin3' => 'iso-8859-3',
347
+
348
+ # iso8859_4 codec
349
+ 'iso8849_4' => 'iso-8859-4',
350
+ 'csisolatin4' => 'iso-8859-4',
351
+ 'iso_8859_4' => 'iso-8859-4',
352
+ 'iso_8859_4_1988' => 'iso-8859-4',
353
+ 'iso_ir_110' => 'iso-8859-4',
354
+ 'l4' => 'iso-8859-4',
355
+ 'latin4' => 'iso-8859-4',
356
+
357
+ # iso8859_5 codec
358
+ 'iso8859_5' => 'iso-8859-5',
359
+ 'csisolatincyrillic' => 'iso-8859-5',
360
+ 'cyrillic' => 'iso-8859-5',
361
+ 'iso_8859_5' => 'iso-8859-5',
362
+ 'iso_8859_5_1988' => 'iso-8859-5',
363
+ 'iso_ir_144' => 'iso-8859-5',
364
+
365
+ # iso8859_6 codec
366
+ 'iso8859_6' => 'iso-8859-6',
367
+ 'arabic' => 'iso-8859-6',
368
+ 'asmo_708' => 'iso-8859-6',
369
+ 'csisolatinarabic' => 'iso-8859-6',
370
+ 'ecma_114' => 'iso-8859-6',
371
+ 'iso_8859_6' => 'iso-8859-6',
372
+ 'iso_8859_6_1987' => 'iso-8859-6',
373
+ 'iso_ir_127' => 'iso-8859-6',
374
+
375
+ # iso8859_7 codec
376
+ 'iso8859_7' => 'iso-8859-7',
377
+ 'csisolatingreek' => 'iso-8859-7',
378
+ 'ecma_118' => 'iso-8859-7',
379
+ 'elot_928' => 'iso-8859-7',
380
+ 'greek' => 'iso-8859-7',
381
+ 'greek8' => 'iso-8859-7',
382
+ 'iso_8859_7' => 'iso-8859-7',
383
+ 'iso_8859_7_1987' => 'iso-8859-7',
384
+ 'iso_ir_126' => 'iso-8859-7',
385
+
386
+ # iso8859_8 codec
387
+ 'iso8859_9' => 'iso8859_8',
388
+ 'csisolatinhebrew' => 'iso-8859-8',
389
+ 'hebrew' => 'iso-8859-8',
390
+ 'iso_8859_8' => 'iso-8859-8',
391
+ 'iso_8859_8_1988' => 'iso-8859-8',
392
+ 'iso_ir_138' => 'iso-8859-8',
393
+
394
+ # iso8859_9 codec
395
+ 'iso8859_9' => 'iso-8859-9',
396
+ 'csisolatin5' => 'iso-8859-9',
397
+ 'iso_8859_9' => 'iso-8859-9',
398
+ 'iso_8859_9_1989' => 'iso-8859-9',
399
+ 'iso_ir_148' => 'iso-8859-9',
400
+ 'l5' => 'iso-8859-9',
401
+ 'latin5' => 'iso-8859-9',
402
+
403
+ # iso8859_11 codec
404
+ 'iso8859_11' => 'iso-8859-11',
405
+ 'thai' => 'iso-8859-11',
406
+ 'iso_8859_11' => 'iso-8859-11',
407
+ 'iso_8859_11_2001' => 'iso-8859-11',
408
+
409
+ # iso8859_16 codec
410
+ 'iso8859_16' => 'iso-8859-16',
411
+ 'iso_8859_16' => 'iso-8859-16',
412
+ 'iso_8859_16_2001' => 'iso-8859-16',
413
+ 'iso_ir_226' => 'iso-8859-16',
414
+ 'l10' => 'iso-8859-16',
415
+ 'latin10' => 'iso-8859-16',
416
+
417
+ # cskoi8r codec
418
+ 'koi8_r' => 'cskoi8r',
419
+
420
+ # mac_cyrillic codec
421
+ 'mac_cyrillic' => 'maccyrillic',
422
+
423
+ # shift_jis codec
424
+ 'csshiftjis' => 'shift_jis',
425
+ 'shiftjis' => 'shift_jis',
426
+ 'sjis' => 'shift_jis',
427
+ 's_jis' => 'shift_jis',
428
+
429
+ # shift_jisx0213 codec
430
+ 'shiftjisx0213' => 'shift_jisx0213',
431
+ 'sjisx0213' => 'shift_jisx0213',
432
+ 's_jisx0213' => 'shift_jisx0213',
433
+
434
+ # utf_16 codec
435
+ 'utf_16' => 'utf-16',
436
+ 'u16' => 'utf-16',
437
+ 'utf16' => 'utf-16',
438
+
439
+ # utf_16_be codec
440
+ 'utf_16_be' => 'utf-16be',
441
+ 'unicodebigunmarked' => 'utf-16be',
442
+ 'utf_16be' => 'utf-16be',
443
+
444
+ # utf_16_le codec
445
+ 'utf_16_le' => 'utf-16le',
446
+ 'unicodelittleunmarked' => 'utf-16le',
447
+ 'utf_16le' => 'utf-16le',
448
+
449
+ # utf_7 codec
450
+ 'utf_7' => 'utf-7',
451
+ 'u7' => 'utf-7',
452
+ 'utf7' => 'utf-7',
453
+
454
+ # utf_8 codec
455
+ 'utf_8' => 'utf-8',
456
+ 'u8' => 'utf-8',
457
+ 'utf' => 'utf-8',
458
+ 'utf8' => 'utf-8',
459
+ 'utf8_ucs2' => 'utf-8',
460
+ 'utf8_ucs4' => 'utf-8',
461
+ }
462
+
463
+ def unicode(data, from_encoding)
464
+ # Takes a single string and converts it from the encoding in
465
+ # from_encoding to unicode.
466
+ uconvert(data, from_encoding, 'unicode')
467
+ end
468
+
469
+ def uconvert(data, from_encoding, to_encoding = 'utf-8')
470
+ from_encoding = Encoding_Aliases[from_encoding] || from_encoding
471
+ to_encoding = Encoding_Aliases[to_encoding] || to_encoding
472
+ Iconv.iconv(to_encoding, from_encoding, data)[0]
473
+ end
474
+
475
+ def unichr(i)
476
+ [i].pack('U*')
477
+ end
478
+
479
+ def index_match(stri,regexp, offset)
480
+ if offset == 241
481
+ end
482
+ i = stri.index(regexp, offset)
483
+
484
+ return nil, nil unless i
485
+
486
+ full = stri[i..-1].match(regexp)
487
+ return i, full
488
+ end
489
+
490
+ def _ebcdic_to_ascii(s)
491
+ return Iconv.iconv("iso88591", "ebcdic-cp-be", s)[0]
492
+ end
493
+
494
+ def urljoin(base, uri)
495
+ urifixer = /^([A-Za-z][A-Za-z0-9+-.]*:\/\/)(\/*)(.*?)/u
496
+ uri = uri.sub(urifixer, '\1\3')
497
+ begin
498
+ return URI.join(base, uri).to_s
499
+ rescue URI::BadURIError => e
500
+ if URI.parse(base).relative?
501
+ return URI::parse(uri).to_s
502
+ end
503
+ end
504
+ end
505
+
506
+ def py2rtime(pytuple)
507
+ Time.utc(pytuple[0..5])
508
+ end
509
+
510
+ # http://intertwingly.net/stories/2005/09/28/xchar.rb
511
+ module XChar
512
+ # http://intertwingly.net/stories/2004/04/14/i18n.html#CleaningWindows
513
+ CP1252 = {
514
+ 128 => 8364, # euro sign
515
+ 130 => 8218, # single low-9 quotation mark
516
+ 131 => 402, # latin small letter f with hook
517
+ 132 => 8222, # double low-9 quotation mark
518
+ 133 => 8230, # horizontal ellipsis
519
+ 134 => 8224, # dagger
520
+ 135 => 8225, # double dagger
521
+ 136 => 710, # modifier letter circumflex accent
522
+ 137 => 8240, # per mille sign
523
+ 138 => 352, # latin capital letter s with caron
524
+ 139 => 8249, # single left-pointing angle quotation mark
525
+ 140 => 338, # latin capital ligature oe
526
+ 142 => 381, # latin capital letter z with caron
527
+ 145 => 8216, # left single quotation mark
528
+ 146 => 8217, # right single quotation mark
529
+ 147 => 8220, # left double quotation mark
530
+ 148 => 8221, # right double quotation mark
531
+ 149 => 8226, # bullet
532
+ 150 => 8211, # en dash
533
+ 151 => 8212, # em dash
534
+ 152 => 732, # small tilde
535
+ 153 => 8482, # trade mark sign
536
+ 154 => 353, # latin small letter s with caron
537
+ 155 => 8250, # single right-pointing angle quotation mark
538
+ 156 => 339, # latin small ligature oe
539
+ 158 => 382, # latin small letter z with caron
540
+ 159 => 376} # latin capital letter y with diaeresis
541
+
542
+ # http://www.w3.org/TR/REC-xml/#dt-chardata
543
+ PREDEFINED = {
544
+ 38 => '&amp;', # ampersand
545
+ 60 => '&lt;', # left angle bracket
546
+ 62 => '&gt;'} # right angle bracket
547
+
548
+ # http://www.w3.org/TR/REC-xml/#charsets
549
+ VALID = [[0x9, 0xA, 0xD], (0x20..0xD7FF),
550
+ (0xE000..0xFFFD), (0x10000..0x10FFFF)]
551
+ end
552
+
553
+ class Fixnum
554
+ # xml escaped version of chr
555
+ def xchr
556
+ n = XChar::CP1252[self] || self
557
+ n = 42 unless XChar::VALID.find {|range| range.include? n}
558
+ XChar::PREDEFINED[n] or (n<128 ? n.chr : "&##{n};")
559
+ end
560
+ end
561
+
562
+ class String
563
+ alias :old_index :index
564
+ def to_xs
565
+ unpack('U*').map {|n| n.xchr}.join # ASCII, UTF-8
566
+ rescue
567
+ unpack('C*').map {|n| n.xchr}.join # ISO-8859-1, WIN-1252
568
+ end
569
+ end
570
+
571
+ class BetterSGMLParserError < Exception; end;
572
+ class BetterSGMLParser < HTML::SGMLParser
573
+ # Replaced Tagfind and Charref Regexps with the ones in feedparser.py
574
+ # This makes things work.
575
+ Interesting = /[&<]/u
576
+ Incomplete = Regexp.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' +
577
+ '<([a-zA-Z][^<>]*|/([a-zA-Z][^<>]*)?|' +
578
+ '![^<>]*)?', 64) # 64 is the unicode flag
579
+
580
+ Entityref = /&([a-zA-Z][-.a-zA-Z0-9]*)[^-.a-zA-Z0-9]/u
581
+ Charref = /&#(x?[0-9A-Fa-f]+)[^0-9A-Fa-f]/u
582
+
583
+ Shorttagopen = /'<[a-zA-Z][-.a-zA-Z0-9]*/u
584
+ Shorttag = /'<([a-zA-Z][-.a-zA-Z0-9]*)\/([^\/]*)\//u
585
+ Endtagopen = /<\//u # Matching the Python SGMLParser
586
+ Endbracket = /[<>]/u
587
+ Declopen = /<!/u
588
+ Piopenbegin = /^<\?/u
589
+ Piclose = />/u
590
+
591
+ Commentopen = /<!--/u
592
+ Commentclose = /--\s*>/u
593
+ Tagfind = /[a-zA-Z][-_.:a-zA-Z0-9]*/u
594
+ Attrfind = Regexp.compile('\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'+
595
+ '(\'[^\']*\'|"[^"]*"|[\]\[\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?',
596
+ 64)
597
+ Endtagfind = /\s*\/\s*>/u
598
+ def initialize(verbose=false)
599
+ super(verbose)
600
+ end
601
+ def feed(*args)
602
+ super(*args)
603
+ end
604
+
605
+ def goahead(_end)
606
+ rawdata = @rawdata # woo, utf-8 magic
607
+ i = 0
608
+ n = rawdata.length
609
+ while i < n
610
+ if @nomoretags
611
+ # handle_data_range does nothing more than set a "Range" that is never used. wtf?
612
+ handle_data(rawdata[i...n]) # i...n means "range from i to n not including n"
613
+ i = n
614
+ break
615
+ end
616
+ j = rawdata.index(Interesting, i)
617
+ j = n unless j
618
+ handle_data(rawdata[i...j]) if i < j
619
+ i = j
620
+ break if (i == n)
621
+ if rawdata[i..i] == '<' # equivalent to rawdata[i..i] == '<' # Yeah, ugly.
622
+ if rawdata.index(Starttagopen,i) == i
623
+ if @literal
624
+ handle_data(rawdata[i..i])
625
+ i = i+1
626
+ next
627
+ end
628
+ k = parse_starttag(i)
629
+ break unless k
630
+ i = k
631
+ next
632
+ end
633
+ if rawdata.index(Endtagopen,i) == i #Don't use Endtagopen
634
+ k = parse_endtag(i)
635
+ break unless k
636
+ i = k
637
+ @literal = false
638
+ next
639
+ end
640
+ if @literal
641
+ if n > (i+1)
642
+ handle_data("<")
643
+ i = i+1
644
+ else
645
+ #incomplete
646
+ break
647
+ end
648
+ next
649
+ end
650
+ if rawdata.index(Commentopen,i) == i
651
+ k = parse_comment(i)
652
+ break unless k
653
+ i = k
654
+ next
655
+ end
656
+ if rawdata.index(Piopenbegin,i) == i # Like Piopen but must be at beginning of rawdata
657
+ k = parse_pi(i)
658
+ break unless k
659
+ i += k
660
+ next
661
+ end
662
+ if rawdata.index(Declopen,i) == i
663
+ # This is some sort of declaration; in "HTML as
664
+ # deployed," this should only be the document type
665
+ # declaration ("<!DOCTYPE html...>").
666
+ k = parse_declaration(i)
667
+ break unless k
668
+ i = k
669
+ next
670
+ end
671
+ elsif rawdata[i..i] == '&'
672
+ if @literal # FIXME BUGME SGMLParser totally does not check this. Bug it.
673
+ handle_data(rawdata[i..i])
674
+ i += 1
675
+ next
676
+ end
677
+
678
+ # the Char must come first as its #=~ method is the only one that is UTF-8 safe
679
+ ni,match = index_match(rawdata, Charref, i)
680
+ if ni and ni == i # See? Ugly
681
+ handle_charref(match[1]) # $1 is just the first group we captured (with parentheses)
682
+ i += match[0].length # $& is the "all" of the match.. it includes the full match we looked for not just the stuff we put parentheses around to capture.
683
+ i -= 1 unless rawdata[i-1..i-1] == ";"
684
+ next
685
+ end
686
+ ni,match = index_match(rawdata, Entityref, i)
687
+ if ni and ni == i
688
+ handle_entityref(match[1])
689
+ i += match[0].length
690
+ i -= 1 unless rawdata[i-1..i-1] == ";"
691
+ next
692
+ end
693
+ else
694
+ error('neither < nor & ??')
695
+ end
696
+ # We get here only if incomplete matches but
697
+ # nothing else
698
+ ni,match = index_match(rawdata,Incomplete,i)
699
+ unless ni and ni == 0
700
+ handle_data(rawdata[i...i+1]) # str[i...i+1] == str[i..i]
701
+ i += 1
702
+ next
703
+ end
704
+ j = ni + match[0].length
705
+ break if j == n # Really incomplete
706
+ handle_data(rawdata[i...j])
707
+ i = j
708
+ end # end while
709
+
710
+ if _end and i < n
711
+ handle_data(rawdata[i...n])
712
+ i = n
713
+ end
714
+
715
+ @rawdata = rawdata[i..-1]
716
+ # @offset += i # FIXME BUGME another unused variable in SGMLParser?
717
+ end
718
+
719
+
720
+ # Internal -- parse processing instr, return length or -1 if not terminated
721
+ def parse_pi(i)
722
+ rawdata = @rawdata
723
+ if rawdata[i...i+2] != '<?'
724
+ error("unexpected call to parse_pi()")
725
+ end
726
+ ni,match = index_match(rawdata,Piclose,i+2)
727
+ return nil unless match
728
+ j = ni
729
+ handle_pi(rawdata[i+2...j])
730
+ j = (j + match[0].length)
731
+ return j-i
732
+ end
733
+
734
+ def parse_comment(i)
735
+ rawdata = @rawdata
736
+ if rawdata[i...i+4] != "<!--"
737
+ error("unexpected call to parse_comment()")
738
+ end
739
+ ni,match = index_match(rawdata, Commentclose,i)
740
+ return nil unless match
741
+ handle_comment(rawdata[i+4..(ni-1)])
742
+ return ni+match[0].length # Length from i to just past the closing comment tag
743
+ end
744
+
745
+
746
+ def parse_starttag(i)
747
+ @_starttag_text = nil
748
+ start_pos = i
749
+ rawdata = @rawdata
750
+ ni,match = index_match(rawdata,Shorttagopen,i)
751
+ if ni == i
752
+ # SGML shorthand: <tag/data/ == <tag>data</tag>
753
+ # XXX Can data contain &... (entity or char refs)?
754
+ # XXX Can data contain < or > (tag characters)?
755
+ # XXX Can there be whitespace before the first /?
756
+ k,match = index_match(rawdata,Shorttag,i)
757
+ return nil unless match
758
+ tag, data = match[1], match[2]
759
+ @_starttag_text = "<#{tag}/"
760
+ tag.downcase!
761
+ second_end = rawdata.index(Shorttagopen,k)
762
+ finish_shorttag(tag, data)
763
+ @_starttag_text = rawdata[start_pos...second_end+1]
764
+ return k
765
+ end
766
+
767
+ j = rawdata.index(Endbracket, i+1)
768
+ return nil unless j
769
+ attrsd = []
770
+ if rawdata[i...i+2] == '<>'
771
+ # SGML shorthand: <> == <last open tag seen>
772
+ k = j
773
+ tag = @lasttag
774
+ else
775
+ ni,match = index_match(rawdata,Tagfind,i+1)
776
+ unless match
777
+ error('unexpected call to parse_starttag')
778
+ end
779
+ k = ni+match[0].length+1
780
+ tag = match[0].downcase
781
+ @lasttag = tag
782
+ end
783
+
784
+ while k < j
785
+ break if rawdata.index(Endtagfind, k) == k
786
+ ni,match = index_match(rawdata,Attrfind,k)
787
+ break unless ni
788
+ matched_length = match[0].length
789
+ attrname, rest, attrvalue = match[1],match[2],match[3]
790
+ if rest.nil? or rest.empty?
791
+ attrvalue = '' # was: = attrname # Why the change?
792
+ elsif [?',?'] == [attrvalue[0..0], attrvalue[-1..-1]] or [?",?"] == [attrvalue[0],attrvalue[-1]]
793
+ attrvalue = attrvalue[1...-1]
794
+ end
795
+ attrsd << [attrname.downcase, attrvalue]
796
+ k += matched_length
797
+ end
798
+ if rawdata[j..j] == ">"
799
+ j += 1
800
+ end
801
+ @_starttag_text = rawdata[start_pos...j]
802
+ finish_starttag(tag, attrsd)
803
+ return j
804
+ end
805
+
806
+ def parse_endtag(i)
807
+ rawdata = @rawdata
808
+ j, match = index_match(rawdata, /[<>]/,i+1)
809
+ return nil unless j
810
+ tag = rawdata[i+2...j].strip.downcase
811
+ if rawdata[j..j] == ">"
812
+ j += 1
813
+ end
814
+ finish_endtag(tag)
815
+ return j
816
+ end
817
+
818
+ def output
819
+ # Return processed HTML as a single string
820
+ return @pieces.map{|p| p.to_s}.join
821
+ end
822
+
823
+ def error(message)
824
+ raise BetterSGMLParserError.new(message)
825
+ end
826
+ def handle_pi(text)
827
+ end
828
+ def handle_decl(text)
829
+ end
830
+ end
831
+
832
+ # Add some helper methods to make AttributeList (all of those damn attrs
833
+ # and attrsD used by StrictFeedParser) act more like a Hash.
834
+ # NOTE AttributeList is still Read-Only (AFAICT).
835
+ # Monkey patching is terrible, and I have an addiction.
836
+ module XML
837
+ module SAX
838
+ module AttributeList # in xml/sax.rb
839
+ def [](key)
840
+ getValue(key)
841
+ end
842
+
843
+ def each(&blk)
844
+ (0...getLength).each{|pos| yield [getName(pos), getValue(pos)]}
845
+ end
846
+
847
+ def each_key(&blk)
848
+ (0...getLength).each{|pos| yield getName(pos) }
849
+ end
850
+
851
+ def each_value(&blk)
852
+ (0...getLength).each{|pos| yield getValue(pos) }
853
+ end
854
+
855
+ def to_a # Rather use collect? grep for to_a.collect
856
+ l = []
857
+ each{|k,v| l << [k,v]}
858
+ return l
859
+ end
860
+
861
+ def to_s
862
+ l = []
863
+ each{|k,v| l << "#{k} => #{v}"}
864
+ "{ "+l.join(", ")+" }"
865
+ end
866
+ end
867
+ end
868
+ end
869
+ # This adds a nice scrub method to Hpricot, so we don't need a _HTMLSanitizer class
870
+ # http://underpantsgnome.com/2007/01/20/hpricot-scrub
871
+ # I have modified it to check for attributes that are only allowed if they are in a certain tag
872
+ module Hpricot
873
+ Acceptable_Elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
874
+ 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
875
+ 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
876
+ 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
877
+ 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map',
878
+ 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp',
879
+ 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
880
+ 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
881
+ 'ul', 'var'
882
+ ]
883
+
884
+ Acceptable_Attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
885
+ 'action', 'align', 'alt', 'axis', 'border', 'cellpadding',
886
+ 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
887
+ 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
888
+ 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
889
+ 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
890
+ 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
891
+ 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
892
+ 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
893
+ 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
894
+ 'type', 'usemap', 'valign', 'value', 'vspace', 'width', 'xml:lang'
895
+ ]
896
+
897
+ Unacceptable_Elements_With_End_Tag = ['script', 'applet']
898
+
899
+ Acceptable_Css_Properties = ['azimuth', 'background-color',
900
+ 'border-bottom-color', 'border-collapse', 'border-color',
901
+ 'border-left-color', 'border-right-color', 'border-top-color', 'clear',
902
+ 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
903
+ 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
904
+ 'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
905
+ 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
906
+ 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
907
+ 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
908
+ 'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
909
+ 'white-space', 'width'
910
+ ]
911
+
912
+ # survey of common keywords found in feeds
913
+ Acceptable_Css_Keywords = ['auto', 'aqua', 'black', 'block', 'blue',
914
+ 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
915
+ 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
916
+ 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
917
+ 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
918
+ 'transparent', 'underline', 'white', 'yellow'
919
+ ]
920
+
921
+ Mathml_Elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
922
+ 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
923
+ 'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
924
+ 'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
925
+ 'munderover', 'none'
926
+ ]
927
+
928
+ Mathml_Attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
929
+ 'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
930
+ 'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
931
+ 'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
932
+ 'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
933
+ 'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
934
+ 'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
935
+ 'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
936
+ 'xlink:type', 'xmlns', 'xmlns:xlink'
937
+ ]
938
+
939
+ # svgtiny - foreignObject + linearGradient + radialGradient + stop
940
+ Svg_Elements = ['a', 'animate', 'animateColor', 'animateMotion',
941
+ 'animateTransform', 'circle', 'defs', 'desc', 'ellipse', 'font-face',
942
+ 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern', 'image',
943
+ 'linearGradient', 'line', 'metadata', 'missing-glyph', 'mpath', 'path',
944
+ 'polygon', 'polyline', 'radialGradient', 'rect', 'set', 'stop', 'svg',
945
+ 'switch', 'text', 'title', 'use'
946
+ ]
947
+
948
+ # svgtiny + class + opacity + offset + xmlns + xmlns:xlink
949
+ Svg_Attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
950
+ 'arabic-form', 'ascent', 'attributeName', 'attributeType',
951
+ 'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
952
+ 'class', 'color', 'color-rendering', 'content', 'cx', 'cy', 'd',
953
+ 'descent', 'display', 'dur', 'end', 'fill', 'fill-rule', 'font-family',
954
+ 'font-size', 'font-stretch', 'font-style', 'font-variant',
955
+ 'font-weight', 'from', 'fx', 'fy', 'g1', 'g2', 'glyph-name',
956
+ 'gradientUnits', 'hanging', 'height', 'horiz-adv-x', 'horiz-origin-x',
957
+ 'id', 'ideographic', 'k', 'keyPoints', 'keySplines', 'keyTimes',
958
+ 'lang', 'mathematical', 'max', 'min', 'name', 'offset', 'opacity',
959
+ 'origin', 'overline-position', 'overline-thickness', 'panose-1',
960
+ 'path', 'pathLength', 'points', 'preserveAspectRatio', 'r',
961
+ 'repeatCount', 'repeatDur', 'requiredExtensions', 'requiredFeatures',
962
+ 'restart', 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv',
963
+ 'stop-color', 'stop-opacity', 'strikethrough-position',
964
+ 'strikethrough-thickness', 'stroke', 'stroke-dasharray',
965
+ 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
966
+ 'stroke-miterlimit', 'stroke-width', 'systemLanguage', 'target',
967
+ 'text-anchor', 'to', 'transform', 'type', 'u1', 'u2',
968
+ 'underline-position', 'underline-thickness', 'unicode',
969
+ 'unicode-range', 'units-per-em', 'values', 'version', 'viewBox',
970
+ 'visibility', 'width', 'widths', 'x', 'x-height', 'x1', 'x2',
971
+ 'xlink:actuate', 'xlink:arcrole', 'xlink:href', 'xlink:role',
972
+ 'xlink:show', 'xlink:title', 'xlink:type', 'xml:base', 'xml:lang',
973
+ 'xml:space', 'xmlns', 'xmlns:xlink', 'y', 'y1', 'y2', 'zoomAndPan'
974
+ ]
975
+
976
+ Svg_Attr_Map = nil
977
+ Svg_Elem_Map = nil
978
+
979
+ Acceptable_Svg_Properties = [ 'fill', 'fill-opacity', 'fill-rule',
980
+ 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
981
+ 'stroke-opacity'
982
+ ]
983
+
984
+ unless $compatible
985
+ @@acceptable_tag_specific_attributes = {}
986
+ @@mathml_elements.each{|e| @@acceptable_tag_specific_attributes[e] = @@mathml_attributes }
987
+ @@svg_elements.each{|e| @@acceptable_tag_specific_attributes[e] = @@svg_attributes }
988
+ end
989
+
990
+ class Elements
991
+ def strip(allowed_tags=[]) # I completely route around this with the recursive_strip in Doc
992
+ each { |x| x.strip(allowed_tags) }
993
+ end
994
+
995
+ def strip_attributes(safe=[])
996
+ each { |x| x.strip_attributes(safe) }
997
+ end
998
+
999
+ def strip_style(ok_props=[], ok_keywords=[])
1000
+ each { |x| x.strip_style(ok_props, ok_keywords) }
1001
+ end
1002
+ end
1003
+
1004
+ class Text
1005
+ def strip(foo)
1006
+ end
1007
+ def strip_attributes(foo)
1008
+ end
1009
+ end
1010
+ class Comment
1011
+ def strip(foo)
1012
+ end
1013
+ def strip_attributes(foo)
1014
+ end
1015
+ end
1016
+ class BogusETag
1017
+ def strip(foo)
1018
+ end
1019
+ def strip_attributes(foo)
1020
+ end
1021
+ end
1022
+
1023
+ class Elem
1024
+ def decode_entities
1025
+ children.each{ |x| x.decode_entities }
1026
+ end
1027
+
1028
+ def cull
1029
+ if children
1030
+ swap(children.to_s)
1031
+ end
1032
+ end
1033
+
1034
+ def strip
1035
+ if strip_removes?
1036
+ cull
1037
+ end
1038
+ end
1039
+
1040
+ def strip_attributes
1041
+ unless attributes.nil?
1042
+ attributes.each do |atr|
1043
+ unless Acceptable_Attributes.include?atr[0]
1044
+ remove_attribute(atr[0])
1045
+ end
1046
+ end
1047
+ end
1048
+ end
1049
+
1050
+ def strip_removes?
1051
+ # I'm sure there are others that shuould be ripped instead of stripped
1052
+ attributes && attributes['type'] =~ /script|css/
1053
+ end
1054
+ end
1055
+ end
1056
+
1057
+ module FeedParser
1058
+ Version = "0.1aleph_naught"
1059
+
1060
+ License = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
1061
+
1062
+ Redistribution and use in source and binary forms, with or without modification,
1063
+ are permitted provided that the following conditions are met:
1064
+
1065
+ * Redistributions of source code must retain the above copyright notice,
1066
+ this list of conditions and the following disclaimer.
1067
+ * Redistributions in binary form must reproduce the above copyright notice,
1068
+ this list of conditions and the following disclaimer in the documentation
1069
+ and/or other materials provided with the distribution.
1070
+
1071
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
1072
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1073
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1074
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
1075
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1076
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
1077
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
1078
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
1079
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
1080
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1081
+ POSSIBILITY OF SUCH DAMAGE."""
1082
+
1083
+ Author = "Jeff Hodges <http://somethingsimilar.com>"
1084
+ Copyright_Holder = "Mark Pilgrim <http://diveintomark.org/>"
1085
+ Contributors = [ "Jason Diamond <http://injektilo.org/>",
1086
+ "John Beimler <http://john.beimler.org/>",
1087
+ "Fazal Majid <http://www.majid.info/mylos/weblog/>",
1088
+ "Aaron Swartz <http://aaronsw.com/>",
1089
+ "Kevin Marks <http://epeus.blogspot.com/>"
1090
+ ]
1091
+ # HTTP "User-Agent" header to send to servers when downloading feeds.
1092
+ # If you are embedding feedparser in a larger application, you should
1093
+ # change this to your application name and URL.
1094
+ USER_AGENT = "UniversalFeedParser/%s +http://feedparser.org/" % @version
1095
+
1096
+ # HTTP "Accept" header to send to servers when downloading feeds. If you don't
1097
+ # want to send an Accept header, set this to None.
1098
+ ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1"
1099
+
1100
+
1101
+ # If you want feedparser to automatically run HTML markup through HTML Tidy, set
1102
+ # this to true. Requires mxTidy <http://www.egenix.com/files/python/mxTidy.html>
1103
+ # or utidylib <http://utidylib.berlios.de/>.
1104
+ TIDY_MARKUP = false #FIXME untranslated
1105
+
1106
+ # List of Python interfaces for HTML Tidy, in order of preference. Only useful
1107
+ # if TIDY_MARKUP = true
1108
+ PREFERRED_TIDY_INTERFACES = ["uTidy", "mxTidy"] #FIXME untranslated
1109
+
1110
+ # The original Python import. I'm using it to help translate
1111
+ #import sgmllib, re, sys, copy, urlparse, time, rfc822, types, cgi, urllib, urllib2
1112
+
1113
+
1114
+
1115
+ # ---------- don't touch these ----------
1116
+ class ThingsNobodyCaresAboutButMe < Exception
1117
+ end
1118
+ class CharacterEncodingOverride < ThingsNobodyCaresAboutButMe
1119
+ end
1120
+ class CharacterEncodingUnknown < ThingsNobodyCaresAboutButMe
1121
+ end
1122
+ class NonXMLContentType < ThingsNobodyCaresAboutButMe
1123
+ end
1124
+ class UndeclaredNamespace < Exception
1125
+ end
1126
+
1127
+
1128
+ SUPPORTED_VERSIONS = {'' => 'unknown',
1129
+ 'rss090' => 'RSS 0.90',
1130
+ 'rss091n' => 'RSS 0.91 (Netscape)',
1131
+ 'rss091u' => 'RSS 0.91 (Userland)',
1132
+ 'rss092' => 'RSS 0.92',
1133
+ 'rss093' => 'RSS 0.93',
1134
+ 'rss094' => 'RSS 0.94',
1135
+ 'rss20' => 'RSS 2.0',
1136
+ 'rss10' => 'RSS 1.0',
1137
+ 'rss' => 'RSS (unknown version)',
1138
+ 'atom01' => 'Atom 0.1',
1139
+ 'atom02' => 'Atom 0.2',
1140
+ 'atom03' => 'Atom 0.3',
1141
+ 'atom10' => 'Atom 1.0',
1142
+ 'atom' => 'Atom (unknown version)',
1143
+ 'cdf' => 'CDF',
1144
+ 'hotrss' => 'Hot RSS'
1145
+ }
1146
+ class FeedParserDict < Hash
1147
+ =begin
1148
+ The naming of a certain common attribute (such as, "When was the last
1149
+ time this feed was updated?") can have many different names depending
1150
+ on the type of feed we are handling. This class allows us to use
1151
+ both the attribute name a person, who has knowledge of the kind of
1152
+ feed being parsed, expects, as well as allowing a developer to rely
1153
+ on one name to contain the proper attribute no matter what kind of
1154
+ feed is being parsed. @@keymaps is a Hash that contains information
1155
+ on what certain attributes "really is" in each feed type. It does so
1156
+ by providing a common name that will map to any feed type in the keys,
1157
+ with possible "correct" attributes in the its values. the #[] and #[]=
1158
+ methods check with keymaps to see what attribute the developer "really
1159
+ means" if they've asked for one which happens to be in @@keymap's keys.
1160
+ =end
1161
+ @@keymap = {'channel' => 'feed',
1162
+ 'items' => 'entries',
1163
+ 'guid' => 'id',
1164
+ 'date' => 'updated',
1165
+ 'date_parsed' => 'updated_parsed',
1166
+ 'description' => ['subtitle', 'summary'],
1167
+ 'url' => ['href'],
1168
+ 'modified' => 'updated',
1169
+ 'modified_parsed' => 'updated_parsed',
1170
+ 'issued' => 'published',
1171
+ 'issued_parsed' => 'published_parsed',
1172
+ 'copyright' => 'rights',
1173
+ 'copyright_detail' => 'rights_detail',
1174
+ 'tagline' => 'subtitle',
1175
+ 'tagline_detail' => 'subtitle_detail'}
1176
+
1177
+ def entries # Apparently, Hash has an entries method! That blew a good 3 hours or more of my time
1178
+ return self['entries']
1179
+ end
1180
+ # We could include the [] rewrite in new using Hash.new's fancy pants block thing
1181
+ # but we'd still have to overwrite []= and such.
1182
+ # I'm going to make it easy to turn lists of pairs into FeedParserDicts's though.
1183
+ def initialize(pairs=nil)
1184
+ if pairs.class == Array and pairs[0].class == Array and pairs[0].length == 2
1185
+ pairs.each do |l|
1186
+ k,v = l
1187
+ self[k] = v
1188
+ end
1189
+ elsif pairs.class == Hash
1190
+ self.merge!(pairs)
1191
+ end
1192
+ end
1193
+
1194
+ def [](key)
1195
+ if key == 'category'
1196
+ return self['tags'][0]['term']
1197
+ end
1198
+ if key == 'categories'
1199
+ return self['tags'].collect{|tag| [tag['scheme'],tag['term']]}
1200
+ end
1201
+ realkey = @@keymap[key] || key
1202
+ if realkey.class == Array
1203
+ realkey.each{ |key| return self[key] if has_key?key }
1204
+ end
1205
+ # Note that the original key is preferred over the realkey we (might
1206
+ # have) found in @@keymaps
1207
+ if has_key?(key)
1208
+ return super(key)
1209
+ end
1210
+ return super(realkey)
1211
+ end
1212
+
1213
+ def []=(key,value)
1214
+ if @@keymap.key?key
1215
+ key = @@keymap[key]
1216
+ if key.class == Array
1217
+ key = key[0]
1218
+ end
1219
+ end
1220
+ super(key,value)
1221
+ end
1222
+
1223
+ def method_missing(msym, *args)
1224
+ methodname = msym.to_s
1225
+ if methodname[-1] == '='
1226
+ return self[methodname[0..-2]] = args[0]
1227
+ elsif methodname[-1] != '!' and methodname[-1] != '?' and methodname[0] != "_" # FIXME implement with private
1228
+ return self[methodname]
1229
+ else
1230
+ raise NoMethodError, "whoops, we don't know about the attribute or method called `#{methodname}' for #{self}:#{self.class}"
1231
+ end
1232
+ end
1233
+ end
1234
+
1235
+
1236
+
1237
+
1238
+ module FeedParserMixin
1239
+ attr_accessor :feeddata, :version, :namespacesInUse, :date_handlers
1240
+
1241
+ def startup(baseuri=nil, baselang=nil, encoding='utf-8')
1242
+ $stderr << "initializing FeedParser\n" if $debug
1243
+
1244
+ @namespaces = {'' => '',
1245
+ 'http://backend.userland.com/rss' => '',
1246
+ 'http://blogs.law.harvard.edu/tech/rss' => '',
1247
+ 'http://purl.org/rss/1.0/' => '',
1248
+ 'http://my.netscape.com/rdf/simple/0.9/' => '',
1249
+ 'http://example.com/newformat#' => '',
1250
+ 'http://example.com/necho' => '',
1251
+ 'http://purl.org/echo/' => '',
1252
+ 'uri/of/echo/namespace#' => '',
1253
+ 'http://purl.org/pie/' => '',
1254
+ 'http://purl.org/atom/ns#' => '',
1255
+ 'http://www.w3.org/2005/Atom' => '',
1256
+ 'http://purl.org/rss/1.0/modules/rss091#' => '',
1257
+ 'http://webns.net/mvcb/' => 'admin',
1258
+ 'http://purl.org/rss/1.0/modules/aggregation/' => 'ag',
1259
+ 'http://purl.org/rss/1.0/modules/annotate/' => 'annotate',
1260
+ 'http://media.tangent.org/rss/1.0/' => 'audio',
1261
+ 'http://backend.userland.com/blogChannelModule' => 'blogChannel',
1262
+ 'http://web.resource.org/cc/' => 'cc',
1263
+ 'http://backend.userland.com/creativeCommonsRssModule' => 'creativeCommons',
1264
+ 'http://purl.org/rss/1.0/modules/company' => 'co',
1265
+ 'http://purl.org/rss/1.0/modules/content/' => 'content',
1266
+ 'http://my.theinfo.org/changed/1.0/rss/' => 'cp',
1267
+ 'http://purl.org/dc/elements/1.1/' => 'dc',
1268
+ 'http://purl.org/dc/terms/' => 'dcterms',
1269
+ 'http://purl.org/rss/1.0/modules/email/' => 'email',
1270
+ 'http://purl.org/rss/1.0/modules/event/' => 'ev',
1271
+ 'http://rssnamespace.org/feedburner/ext/1.0' => 'feedburner',
1272
+ 'http://freshmeat.net/rss/fm/' => 'fm',
1273
+ 'http://xmlns.com/foaf/0.1/' => 'foaf',
1274
+ 'http://www.w3.org/2003/01/geo/wgs84_pos#' => 'geo',
1275
+ 'http://postneo.com/icbm/' => 'icbm',
1276
+ 'http://purl.org/rss/1.0/modules/image/' => 'image',
1277
+ 'http://www.itunes.com/DTDs/PodCast-1.0.dtd' => 'itunes',
1278
+ 'http://example.com/DTDs/PodCast-1.0.dtd' => 'itunes',
1279
+ 'http://purl.org/rss/1.0/modules/link/' => 'l',
1280
+ 'http://search.yahoo.com/mrss' => 'media',
1281
+ 'http://madskills.com/public/xml/rss/module/pingback/' => 'pingback',
1282
+ 'http://prismstandard.org/namespaces/1.2/basic/' => 'prism',
1283
+ 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf',
1284
+ 'http://www.w3.org/2000/01/rdf-schema#' => 'rdfs',
1285
+ 'http://purl.org/rss/1.0/modules/reference/' => 'ref',
1286
+ 'http://purl.org/rss/1.0/modules/richequiv/' => 'reqv',
1287
+ 'http://purl.org/rss/1.0/modules/search/' => 'search',
1288
+ 'http://purl.org/rss/1.0/modules/slash/' => 'slash',
1289
+ 'http://schemas.xmlsoap.org/soap/envelope/' => 'soap',
1290
+ 'http://purl.org/rss/1.0/modules/servicestatus/' => 'ss',
1291
+ 'http://hacks.benhammersley.com/rss/streaming/' => 'str',
1292
+ 'http://purl.org/rss/1.0/modules/subscription/' => 'sub',
1293
+ 'http://purl.org/rss/1.0/modules/syndication/' => 'sy',
1294
+ 'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo',
1295
+ 'http://purl.org/rss/1.0/modules/threading/' => 'thr',
1296
+ 'http://purl.org/rss/1.0/modules/textinput/' => 'ti',
1297
+ 'http://madskills.com/public/xml/rss/module/trackback/' =>'trackback',
1298
+ 'http://wellformedweb.org/commentAPI/' => 'wfw',
1299
+ 'http://purl.org/rss/1.0/modules/wiki/' => 'wiki',
1300
+ 'http://www.w3.org/1999/xhtml' => 'xhtml',
1301
+ 'http://www.w3.org/XML/1998/namespace' => 'xml',
1302
+ 'http://www.w3.org/1999/xlink' => 'xlink',
1303
+ 'http://schemas.pocketsoap.com/rss/myDescModule/' => 'szf'
1304
+ }
1305
+ @matchnamespaces = {}
1306
+ @namespaces.each do |l|
1307
+ @matchnamespaces[l[0].downcase] = l[1]
1308
+ end
1309
+ @can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'license', 'icon', 'logo']
1310
+ @can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
1311
+ @can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
1312
+ @html_types = ['text/html', 'application/xhtml+xml']
1313
+ @feeddata = FeedParserDict.new # feed-level data
1314
+ @encoding = encoding # character encoding
1315
+ @entries = [] # list of entry-level data
1316
+ @version = '' # feed type/version see SUPPORTED_VERSIOSN
1317
+ @namespacesInUse = {} # hash of namespaces defined by the feed
1318
+
1319
+ # the following are used internall to track state;
1320
+ # this is really out of control and should be refactored
1321
+ @infeed = false
1322
+ @inentry = false
1323
+ @incontent = 0 # Yes, this needs to be zero until I work out popContent and pushContent
1324
+ @intextinput = false
1325
+ @inimage = false
1326
+ @inauthor = false
1327
+ @incontributor = false
1328
+ @inpublisher = false
1329
+ @insource = false
1330
+ @sourcedata = FeedParserDict.new
1331
+ @contentparams = FeedParserDict.new
1332
+ @summaryKey = nil
1333
+ @namespacemap = {}
1334
+ @elementstack = []
1335
+ @basestack = []
1336
+ @langstack = []
1337
+ @baseuri = baseuri || ''
1338
+ @lang = baselang || nil
1339
+ if baselang
1340
+ @feeddata['language'] = baselang.gsub('_','-')
1341
+ end
1342
+ @date_handlers = [:_parse_date_rfc822,
1343
+ :_parse_date_hungarian, :_parse_date_greek,:_parse_date_mssql,
1344
+ :_parse_date_nate,:_parse_date_onblog,:_parse_date_w3dtf,:_parse_date_iso8601
1345
+ ]
1346
+ $stderr << "Leaving startup\n" if $debug # My addition
1347
+ end
1348
+
1349
+ def unknown_starttag(tag, attrsd)
1350
+ $stderr << "start #{tag} with #{attrsd}\n" if $debug
1351
+ # normalize attrs
1352
+ attrsD = {}
1353
+ attrsd = Hash[*attrsd.flatten] if attrsd.class == Array # Magic! Asterisk!
1354
+ # LooseFeedParser needs the above because SGMLParser sends attrs as a
1355
+ # list of lists (like [['type','text/html'],['mode','escaped']])
1356
+
1357
+ attrsd.each do |old_k,value|
1358
+ # There has to be a better, non-ugly way of doing this
1359
+ k = old_k.downcase # Downcase all keys
1360
+ attrsD[k] = value
1361
+ if ['rel','type'].include?value
1362
+ attrsD[k].downcase! # Downcase the value if the key is 'rel' or 'type'
1363
+ end
1364
+ end
1365
+
1366
+ # track xml:base and xml:lang
1367
+ baseuri = attrsD['xml:base'] || attrsD['base'] || @baseuri
1368
+ @baseuri = urljoin(@baseuri, baseuri)
1369
+ lang = attrsD['xml:lang'] || attrsD['lang']
1370
+ if lang == '' # FIXME This next bit of code is right? Wtf?
1371
+ # xml:lang could be explicitly set to '', we need to capture that
1372
+ lang = nil
1373
+ elsif lang.nil?
1374
+ # if no xml:lang is specified, use parent lang
1375
+ lang = @lang
1376
+ end
1377
+ if lang and not lang.empty? # Seriously, this cannot be correct
1378
+ if ['feed', 'rss', 'rdf:RDF'].include?tag
1379
+ @feeddata['language'] = lang.gsub('_','-')
1380
+ end
1381
+ end
1382
+ @lang = lang
1383
+ @basestack << @baseuri
1384
+ @langstack << lang
1385
+
1386
+ # track namespaces
1387
+ attrsd.each do |prefix, uri|
1388
+ if /^xmlns:/ =~ prefix # prefix begins with xmlns:
1389
+ trackNamespace(prefix[6..-1], uri)
1390
+ elsif prefix == 'xmlns':
1391
+ trackNamespace(nil, uri)
1392
+ end
1393
+ end
1394
+
1395
+ # track inline content
1396
+ if @incontent != 0 and @contentparams.has_key?('type') and not ( /xml$/ =~ (@contentparams['type'] || 'xml') )
1397
+ # element declared itself as escaped markup, but isn't really
1398
+
1399
+ @contentparams['type'] = 'application/xhtml+xml'
1400
+ end
1401
+ if @incontent != 0 and @contentparams['type'] == 'application/xhtml+xml'
1402
+ # Note: probably shouldn't simply recreate localname here, but
1403
+ # our namespace handling isn't actually 100% correct in cases where
1404
+ # the feed redefines the default namespace (which is actually
1405
+ # the usual case for inline content, thanks Sam), so here we
1406
+ # cheat and just reconstruct the element based on localname
1407
+ # because that compensates for the bugs in our namespace handling.
1408
+ # This will horribly munge inline content with non-empty qnames,
1409
+ # but nobody actually does that, so I'm not fixing it.
1410
+ tag = tag.split(':')[-1]
1411
+ attrsA = attrsd.to_a.collect{|l| "#{l[0]}=\"#{l[1]}\""}
1412
+ attrsS = ' '+attrsA.join(' ')
1413
+ return handle_data("<#{tag}#{attrsS}>", escape=false)
1414
+ end
1415
+
1416
+ # match namespaces
1417
+ if /:/ =~ tag
1418
+ prefix, suffix = tag.split(':', 2)
1419
+ else
1420
+ prefix, suffix = '', tag
1421
+ end
1422
+ prefix = @namespacemap[prefix] || prefix
1423
+ if prefix and not prefix.empty?
1424
+ prefix = prefix + '_'
1425
+ end
1426
+
1427
+ # special hack for better tracking of empty textinput/image elements in illformed feeds
1428
+ if (not prefix and not prefix.empty?) and not (['title', 'link', 'description','name'].include?tag)
1429
+ @intextinput = false
1430
+ end
1431
+ if (prefix.nil? or prefix.empty?) and not (['title', 'link', 'description', 'url', 'href', 'width', 'height'].include?tag)
1432
+ @inimage = false
1433
+ end
1434
+
1435
+ # call special handler (if defined) or default handler
1436
+ begin
1437
+ return send('_start_'+prefix+suffix, attrsD)
1438
+ rescue NoMethodError
1439
+ return push(prefix + suffix, true)
1440
+ end
1441
+ end # End unknown_starttag
1442
+
1443
+ def unknown_endtag(tag)
1444
+ $stderr << "end #{tag}\n" if $debug
1445
+ # match namespaces
1446
+ if tag.index(':')
1447
+ prefix, suffix = tag.split(':',2)
1448
+ else
1449
+ prefix, suffix = '', tag
1450
+ end
1451
+ prefix = @namespacemap[prefix] || prefix
1452
+ if prefix and not prefix.empty?
1453
+ prefix = prefix + '_'
1454
+ end
1455
+
1456
+ # call special handler (if defined) or default handler
1457
+ begin
1458
+ send('_end_' + prefix + suffix) # NOTE no return here! do not add it!
1459
+ rescue NoMethodError => details
1460
+ pop(prefix + suffix)
1461
+ end
1462
+
1463
+ # track inline content
1464
+ if @incontent != 0 and @contentparams.has_key?'type' and /xml$/ =~ (@contentparams['type'] || 'xml')
1465
+ # element declared itself as escaped markup, but it isn't really
1466
+ @contentparams['type'] = 'application/xhtml+xml'
1467
+ end
1468
+ if @incontent != 0 and @contentparams['type'] == 'application/xhtml+xml'
1469
+ tag = tag.split(':')[-1]
1470
+ handle_data("</#{tag}>", escape=false)
1471
+ end
1472
+
1473
+ # track xml:base and xml:lang going out of scope
1474
+ if @basestack and not @basestack.empty?
1475
+ @basestack.pop
1476
+ if @basestack and @basestack[-1] and not (@basestack.empty? or @basestack[-1].empty?)
1477
+ @baseuri = @basestack[-1]
1478
+ end
1479
+ end
1480
+ if @langstack and not @langstack.empty?
1481
+ @langstack.pop
1482
+ if @langstack and not @langstack.empty? # and @langstack[-1] and not @langstack.empty?
1483
+ @lang = @langstack[-1]
1484
+ end
1485
+ end
1486
+ end
1487
+
1488
+ def handle_charref(ref)
1489
+ # LooseParserOnly
1490
+ # called for each character reference, e.g. for '&#160;', ref will be '160'
1491
+ $stderr << "entering handle_charref with #{ref}\n" if $debug
1492
+ return if @elementstack.nil? or @elementstack.empty?
1493
+ ref.downcase!
1494
+ chars = ['34', '38', '39', '60', '62', 'x22', 'x26', 'x27', 'x3c', 'x3e']
1495
+ if chars.include?ref
1496
+ text = "&##{ref};"
1497
+ else
1498
+ if ref[0..0] == 'x'
1499
+ c = (ref[1..-1]).to_i(16)
1500
+ else
1501
+ c = ref.to_i
1502
+ end
1503
+ text = uconvert(unichr(c),'unicode')
1504
+ end
1505
+ @elementstack[-1][2] << text
1506
+ end
1507
+
1508
+ def handle_entityref(ref)
1509
+ # LooseParserOnly
1510
+ # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
1511
+
1512
+ return if @elementstack.nil? or @elementstack.empty?
1513
+ $stderr << "entering handle_entityref with #{ref}\n" if $debug
1514
+ ents = ['lt', 'gt', 'quot', 'amp', 'apos']
1515
+ if ents.include?ref
1516
+ text = "&#{ref};"
1517
+ else
1518
+ text = HTMLEntities::decode_entities("&#{ref};")
1519
+ end
1520
+ @elementstack[-1][2] << text
1521
+ end
1522
+
1523
+ def handle_data(text, escape=true)
1524
+ # called for each block of plain text, i.e. outside of any tag and
1525
+ # not containing any character or entity references
1526
+ return if @elementstack.nil? or @elementstack.empty?
1527
+ if escape and @contentparams['type'] == 'application/xhtml+xml'
1528
+ text = text.to_xs
1529
+ end
1530
+ @elementstack[-1][2] << text
1531
+ end
1532
+
1533
+ def handle_comment(comment)
1534
+ # called for each comment, e.g. <!-- insert message here -->
1535
+ end
1536
+
1537
+ def handle_pi(text)
1538
+ end
1539
+
1540
+ def handle_decl(text)
1541
+ end
1542
+
1543
+ def parse_declaration(i)
1544
+ # for LooseFeedParser
1545
+ $stderr << "entering parse_declaration\n" if $debug
1546
+ if @rawdata[i...i+9] == '<![CDATA['
1547
+ k = @rawdata.index(/\]\]>/u,i+9)
1548
+ k = @rawdata.length unless k
1549
+ handle_data(@rawdata[i+9...k].to_xs,false)
1550
+ return k+3
1551
+ else
1552
+ k = @rawdata.index(/>/,i).to_i
1553
+ return k+1
1554
+ end
1555
+ end
1556
+
1557
+ def mapContentType(contentType)
1558
+ contentType.downcase!
1559
+ case contentType
1560
+ when 'text'
1561
+ contentType = 'text/plain'
1562
+ when 'html'
1563
+ contentType = 'text/html'
1564
+ when 'xhtml'
1565
+ contentType = 'application/xhtml+xml'
1566
+ end
1567
+ return contentType
1568
+ end
1569
+
1570
+ def trackNamespace(prefix, uri)
1571
+
1572
+ loweruri = uri.downcase.strip
1573
+ if [prefix, loweruri] == [nil, 'http://my.netscape.com/rdf/simple/0.9/'] and (@version.nil? or @version.empty?)
1574
+ @version = 'rss090'
1575
+ elsif loweruri == 'http://purl.org/rss/1.0/' and (@version.nil? or @version.empty?)
1576
+ @version = 'rss10'
1577
+ elsif loweruri == 'http://www.w3.org/2005/atom' and (@version.nil? or @version.empty?)
1578
+ @version = 'atom10'
1579
+ elsif /backend\.userland\.com\/rss/ =~ loweruri
1580
+ # match any backend.userland.com namespace
1581
+ uri = 'http://backend.userland.com/rss'
1582
+ loweruri = uri
1583
+ end
1584
+ if @matchnamespaces.has_key? loweruri
1585
+ @namespacemap[prefix] = @matchnamespaces[loweruri]
1586
+ @namespacesInUse[@matchnamespaces[loweruri]] = uri
1587
+ else
1588
+ @namespacesInUse[prefix || ''] = uri
1589
+ end
1590
+ end
1591
+
1592
+ def resolveURI(uri)
1593
+ return urljoin(@baseuri || '', uri)
1594
+ end
1595
+
1596
+ def decodeEntities(element, data)
1597
+ return data
1598
+ end
1599
+
1600
+ def push(element, expectingText)
1601
+ @elementstack << [element, expectingText, []]
1602
+ end
1603
+
1604
+ def pop(element, stripWhitespace=true)
1605
+ return if @elementstack.nil? or @elementstack.empty?
1606
+ return if @elementstack[-1][0] != element
1607
+ element, expectingText, pieces = @elementstack.pop
1608
+ if pieces.class == Array
1609
+ output = pieces.join('')
1610
+ else
1611
+ output = pieces
1612
+ end
1613
+ if stripWhitespace
1614
+ output.strip!
1615
+ end
1616
+ return output if not expectingText
1617
+
1618
+ # decode base64 content
1619
+ if @contentparams['base64']
1620
+ out64 = Base64::decode64(output) # a.k.a. [output].unpack('m')[0]
1621
+ if not output.empty? and not out64.empty?
1622
+ output = out64
1623
+ end
1624
+ end
1625
+
1626
+ # resolve relative URIs
1627
+ if @can_be_relative_uri.include?element and output and not output.empty?
1628
+ output = resolveURI(output)
1629
+ end
1630
+
1631
+ # decode entities within embedded markup
1632
+ if not @contentparams['base64']
1633
+ output = decodeEntities(element, output)
1634
+ end
1635
+
1636
+ # remove temporary cruft from contentparams
1637
+ @contentparams.delete('mode')
1638
+ @contentparams.delete('base64')
1639
+
1640
+ # resolve relative URIs within embedded markup
1641
+ if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
1642
+ if @can_contain_relative_uris.include?element
1643
+ output = FeedParser.resolveRelativeURIs(output, @baseuri, @encoding)
1644
+ end
1645
+ end
1646
+ # sanitize embedded markup
1647
+ if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
1648
+ if @can_contain_dangerous_markup.include?element
1649
+ output = FeedParser.sanitizeHTML(output, @encoding)
1650
+ end
1651
+ end
1652
+
1653
+ if @encoding and not @encoding.empty? and @encoding != 'utf-8'
1654
+ output = uconvert(output, @encoding, 'utf-8')
1655
+ # FIXME I turn everything into utf-8, not unicode, originally because REXML was being used but now beause I haven't tested it out yet.
1656
+ end
1657
+
1658
+ # categories/tags/keywords/whatever are handled in _end_category
1659
+ return output if element == 'category'
1660
+
1661
+ # store output in appropriate place(s)
1662
+ if @inentry and not @insource
1663
+ if element == 'content'
1664
+ @entries[-1][element] ||= []
1665
+ contentparams = Marshal.load(Marshal.dump(@contentparams)) # deepcopy
1666
+ contentparams['value'] = output
1667
+ @entries[-1][element] << contentparams
1668
+ elsif element == 'link'
1669
+ @entries[-1][element] = output
1670
+ if output and not output.empty?
1671
+ @entries[-1]['links'][-1]['href'] = output
1672
+ end
1673
+ else
1674
+ element = 'summary' if element == 'description'
1675
+ @entries[-1][element] = output
1676
+ if @incontent != 0
1677
+ contentparams = Marshal.load(Marshal.dump(@contentparams))
1678
+ contentparams['value'] = output
1679
+ @entries[-1][element + '_detail'] = contentparams
1680
+ end
1681
+ end
1682
+ elsif (@infeed or @insource) and not @intextinput and not @inimage
1683
+ context = getContext()
1684
+ element = 'subtitle' if element == 'description'
1685
+ context[element] = output
1686
+ if element == 'link'
1687
+ context['links'][-1]['href'] = output
1688
+ elsif @incontent != 0
1689
+ contentparams = Marshal.load(Marshal.dump(@contentparams))
1690
+ contentparams['value'] = output
1691
+ context[element + '_detail'] = contentparams
1692
+ end
1693
+ end
1694
+ return output
1695
+ end
1696
+
1697
+ def pushContent(tag, attrsD, defaultContentType, expectingText)
1698
+ @incontent += 1 # Yes, I hate this.
1699
+ type = mapContentType(attrsD['type'] || defaultContentType)
1700
+ @contentparams = FeedParserDict.new({'type' => type,'language' => @lang,'base' => @baseuri})
1701
+ @contentparams['base64'] = isBase64(attrsD, @contentparams)
1702
+ push(tag, expectingText)
1703
+ end
1704
+
1705
+ def popContent(tag)
1706
+ value = pop(tag)
1707
+ @incontent -= 1
1708
+ @contentparams.clear
1709
+ return value
1710
+ end
1711
+
1712
+ def mapToStandardPrefix(name)
1713
+ colonpos = name.index(':')
1714
+ if colonpos
1715
+ prefix = name[0..colonpos-1]
1716
+ suffix = name[colonpos+1..-1]
1717
+ prefix = @namespacemap[prefix] || prefix
1718
+ name = prefix + ':' + suffix
1719
+ end
1720
+ return name
1721
+ end
1722
+
1723
+ def getAttribute(attrsD, name)
1724
+ return attrsD[mapToStandardPrefix(name)]
1725
+ end
1726
+
1727
+ def isBase64(attrsD, contentparams)
1728
+ return true if (attrsD['mode'] == 'base64')
1729
+ if /(^text\/)|(\+xml$)|(\/xml$)/ =~ contentparams['type']
1730
+ return false
1731
+ end
1732
+ return true
1733
+ end
1734
+
1735
+ def itsAnHrefDamnIt(attrsD)
1736
+ href= attrsD['url'] || attrsD['uri'] || attrsD['href']
1737
+ if href
1738
+ attrsD.delete('url')
1739
+ attrsD.delete('uri')
1740
+ attrsD['href'] = href
1741
+ end
1742
+ return attrsD
1743
+ end
1744
+
1745
+
1746
+ def _save(key, value)
1747
+ context = getContext()
1748
+ context[key] ||= value
1749
+ end
1750
+
1751
+ def _start_rss(attrsD)
1752
+ versionmap = {'0.91' => 'rss091u',
1753
+ '0.92' => 'rss092',
1754
+ '0.93' => 'rss093',
1755
+ '0.94' => 'rss094'
1756
+ }
1757
+
1758
+ if not @version or @version.empty?
1759
+ attr_version = attrsD['version'] || ''
1760
+ version = versionmap[attr_version]
1761
+ if version and not version.empty?
1762
+ @version = version
1763
+ elsif /^2\./ =~ attr_version
1764
+ @version = 'rss20'
1765
+ else
1766
+ @version = 'rss'
1767
+ end
1768
+ end
1769
+ end
1770
+
1771
+ def _start_dlhottitles(attrsD)
1772
+ @version = 'hotrss'
1773
+ end
1774
+
1775
+ def _start_channel(attrsD)
1776
+ @infeed = true
1777
+ _cdf_common(attrsD)
1778
+ end
1779
+ alias :_start_feedinfo :_start_channel
1780
+
1781
+ def _cdf_common(attrsD)
1782
+ if attrsD.has_key?'lastmod'
1783
+ _start_modified({})
1784
+ @elementstack[-1][-1] = attrsD['lastmod']
1785
+ _end_modified
1786
+ end
1787
+ if attrsD.has_key?'href'
1788
+ _start_link({})
1789
+ @elementstack[-1][-1] = attrsD['href']
1790
+ _end_link
1791
+ end
1792
+ end
1793
+
1794
+ def _start_feed(attrsD)
1795
+ @infeed = true
1796
+ versionmap = {'0.1' => 'atom01',
1797
+ '0.2' => 'atom02',
1798
+ '0.3' => 'atom03'
1799
+ }
1800
+
1801
+ if not @version or @version.empty?
1802
+ attr_version = attrsD['version']
1803
+ version = versionmap[attr_version]
1804
+ if @version and not @version.empty?
1805
+ @version = version
1806
+ else
1807
+ @version = 'atom'
1808
+ end
1809
+ end
1810
+ end
1811
+
1812
+ def _end_channel
1813
+ @infeed = false
1814
+ end
1815
+ alias :_end_feed :_end_channel
1816
+
1817
+ def _start_image(attrsD)
1818
+ @inimage = true
1819
+ push('image', false)
1820
+ context = getContext()
1821
+ context['image'] ||= FeedParserDict.new
1822
+ end
1823
+
1824
+ def _end_image
1825
+ pop('image')
1826
+ @inimage = false
1827
+ end
1828
+
1829
+ def _start_textinput(attrsD)
1830
+ @intextinput = true
1831
+ push('textinput', false)
1832
+ context = getContext()
1833
+ context['textinput'] ||= FeedParserDict.new
1834
+ end
1835
+ alias :_start_textInput :_start_textinput
1836
+
1837
+ def _end_textinput
1838
+ pop('textinput')
1839
+ @intextinput = false
1840
+ end
1841
+ alias :_end_textInput :_end_textinput
1842
+
1843
+ def _start_author(attrsD)
1844
+ @inauthor = true
1845
+ push('author', true)
1846
+ end
1847
+ alias :_start_managingeditor :_start_author
1848
+ alias :_start_dc_author :_start_author
1849
+ alias :_start_dc_creator :_start_author
1850
+ alias :_start_itunes_author :_start_author
1851
+
1852
+ def _end_author
1853
+ pop('author')
1854
+ @inauthor = false
1855
+ _sync_author_detail()
1856
+ end
1857
+ alias :_end_managingeditor :_end_author
1858
+ alias :_end_dc_author :_end_author
1859
+ alias :_end_dc_creator :_end_author
1860
+ alias :_end_itunes_author :_end_author
1861
+
1862
+ def _start_itunes_owner(attrsD)
1863
+ @inpublisher = true
1864
+ push('publisher', false)
1865
+ end
1866
+
1867
+ def _end_itunes_owner
1868
+ pop('publisher')
1869
+ @inpublisher = false
1870
+ _sync_author_detail('publisher')
1871
+ end
1872
+
1873
+ def _start_contributor(attrsD)
1874
+ @incontributor = true
1875
+ context = getContext()
1876
+ context['contributors'] ||= []
1877
+ context['contributors'] << FeedParserDict.new
1878
+ push('contributor', false)
1879
+ end
1880
+
1881
+ def _end_contributor
1882
+ pop('contributor')
1883
+ @incontributor = false
1884
+ end
1885
+
1886
+ def _start_dc_contributor(attrsD)
1887
+ @incontributor = true
1888
+ context = getContext()
1889
+ context['contributors'] ||= []
1890
+ context['contributors'] << FeedParserDict.new
1891
+ push('name', false)
1892
+ end
1893
+
1894
+ def _end_dc_contributor
1895
+ _end_name
1896
+ @incontributor = false
1897
+ end
1898
+
1899
+ def _start_name(attrsD)
1900
+ push('name', false)
1901
+ end
1902
+ alias :_start_itunes_name :_start_name
1903
+
1904
+ def _end_name
1905
+ value = pop('name')
1906
+ if @inpublisher
1907
+ _save_author('name', value, 'publisher')
1908
+ elsif @inauthor
1909
+ _save_author('name', value)
1910
+ elsif @incontributor
1911
+ _save_contributor('name', value)
1912
+ elsif @intextinput
1913
+ context = getContext()
1914
+ context['textinput']['name'] = value
1915
+ end
1916
+ end
1917
+ alias :_end_itunes_name :_end_name
1918
+
1919
+ def _start_width(attrsD)
1920
+ push('width', false)
1921
+ end
1922
+
1923
+ def _end_width
1924
+ value = pop('width').to_i
1925
+ if @inimage
1926
+ context = getContext
1927
+ context['image']['width'] = value
1928
+ end
1929
+ end
1930
+
1931
+ def _start_height(attrsD)
1932
+ push('height', false)
1933
+ end
1934
+
1935
+ def _end_height
1936
+ value = pop('height').to_i
1937
+ if @inimage
1938
+ context = getContext()
1939
+ context['image']['height'] = value
1940
+ end
1941
+ end
1942
+
1943
+ def _start_url(attrsD)
1944
+ push('href', true)
1945
+ end
1946
+ alias :_start_homepage :_start_url
1947
+ alias :_start_uri :_start_url
1948
+
1949
+ def _end_url
1950
+ value = pop('href')
1951
+ if @inauthor
1952
+ _save_author('href', value)
1953
+ elsif @incontributor
1954
+ _save_contributor('href', value)
1955
+ elsif @inimage
1956
+ context = getContext()
1957
+ context['image']['href'] = value
1958
+ elsif @intextinput
1959
+ context = getContext()
1960
+ context['textinput']['link'] = value
1961
+ end
1962
+ end
1963
+ alias :_end_homepage :_end_url
1964
+ alias :_end_uri :_end_url
1965
+
1966
+ def _start_email(attrsD)
1967
+ push('email', false)
1968
+ end
1969
+ alias :_start_itunes_email :_start_email
1970
+
1971
+ def _end_email
1972
+ value = pop('email')
1973
+ if @inpublisher
1974
+ _save_author('email', value, 'publisher')
1975
+ elsif @inauthor
1976
+ _save_author('email', value)
1977
+ elsif @incontributor
1978
+ _save_contributor('email', value)
1979
+ end
1980
+ end
1981
+ alias :_end_itunes_email :_end_email
1982
+
1983
+ def getContext
1984
+ if @insource
1985
+ context = @sourcedata
1986
+ elsif @inentry
1987
+ context = @entries[-1]
1988
+ else
1989
+ context = @feeddata
1990
+ end
1991
+ return context
1992
+ end
1993
+
1994
+ def _save_author(key, value, prefix='author')
1995
+ context = getContext()
1996
+ context[prefix + '_detail'] ||= FeedParserDict.new
1997
+ context[prefix + '_detail'][key] = value
1998
+ _sync_author_detail()
1999
+ end
2000
+
2001
+ def _save_contributor(key, value)
2002
+ context = getContext
2003
+ context['contributors'] ||= [FeedParserDict.new]
2004
+ context['contributors'][-1][key] = value
2005
+ end
2006
+
2007
+ def _sync_author_detail(key='author')
2008
+ context = getContext()
2009
+ detail = context["#{key}_detail"]
2010
+ if detail and not detail.empty?
2011
+ name = detail['name']
2012
+ email = detail['email']
2013
+
2014
+ if name and email and not (name.empty? or name.empty?)
2015
+ context[key] = "#{name} (#{email})"
2016
+ elsif name and not name.empty?
2017
+ context[key] = name
2018
+ elsif email and not email.empty?
2019
+ context[key] = email
2020
+ end
2021
+ else
2022
+ author = context[key].dup unless context[key].nil?
2023
+ return if not author or author.empty?
2024
+ emailmatch = author.match(/(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))/)
2025
+ email = emailmatch[1]
2026
+ author.gsub!(email, '')
2027
+ author.gsub!("\(\)", '')
2028
+ author.strip!
2029
+ author.gsub!(/^\(/,'')
2030
+ author.gsub!(/\)$/,'')
2031
+ author.strip!
2032
+ context["#{key}_detail"] ||= FeedParserDict.new
2033
+ context["#{key}_detail"]['name'] = author
2034
+ context["#{key}_detail"]['email'] = email
2035
+ end
2036
+ end
2037
+
2038
+ def _start_subtitle(attrsD)
2039
+ pushContent('subtitle', attrsD, 'text/plain', true)
2040
+ end
2041
+ alias :_start_tagline :_start_subtitle
2042
+ alias :_start_itunes_subtitle :_start_subtitle
2043
+
2044
+ def _end_subtitle
2045
+ popContent('subtitle')
2046
+ end
2047
+ alias :_end_tagline :_end_subtitle
2048
+ alias :_end_itunes_subtitle :_end_subtitle
2049
+
2050
+ def _start_rights(attrsD)
2051
+ pushContent('rights', attrsD, 'text/plain', true)
2052
+ end
2053
+ alias :_start_dc_rights :_start_rights
2054
+ alias :_start_copyright :_start_rights
2055
+
2056
+ def _end_rights
2057
+ popContent('rights')
2058
+ end
2059
+ alias :_end_dc_rights :_end_rights
2060
+ alias :_end_copyright :_end_rights
2061
+
2062
+ def _start_item(attrsD)
2063
+ @entries << FeedParserDict.new
2064
+ push('item', false)
2065
+ @inentry = true
2066
+ @guidislink = false
2067
+ id = getAttribute(attrsD, 'rdf:about')
2068
+ if id and not id.empty?
2069
+ context = getContext()
2070
+ context['id'] = id
2071
+ end
2072
+ _cdf_common(attrsD)
2073
+ end
2074
+ alias :_start_entry :_start_item
2075
+ alias :_start_product :_start_item
2076
+
2077
+ def _end_item
2078
+ pop('item')
2079
+ @inentry = false
2080
+ end
2081
+ alias :_end_entry :_end_item
2082
+
2083
+ def _start_dc_language(attrsD)
2084
+ push('language', true)
2085
+ end
2086
+ alias :_start_language :_start_dc_language
2087
+
2088
+ def _end_dc_language
2089
+ @lang = pop('language')
2090
+ end
2091
+ alias :_end_language :_end_dc_language
2092
+
2093
+ def _start_dc_publisher(attrsD)
2094
+ push('publisher', true)
2095
+ end
2096
+ alias :_start_webmaster :_start_dc_publisher
2097
+
2098
+ def _end_dc_publisher
2099
+ pop('publisher')
2100
+ _sync_author_detail('publisher')
2101
+ end
2102
+ alias :_end_webmaster :_end_dc_publisher
2103
+
2104
+ def _start_published(attrsD)
2105
+ push('published', true)
2106
+ end
2107
+ alias :_start_dcterms_issued :_start_published
2108
+ alias :_start_issued :_start_published
2109
+
2110
+ def _end_published
2111
+ value = pop('published')
2112
+ _save('published_parsed', parse_date(value))
2113
+ end
2114
+ alias :_end_dcterms_issued :_end_published
2115
+ alias :_end_issued :_end_published
2116
+
2117
+ def _start_updated(attrsD)
2118
+ push('updated', true)
2119
+ end
2120
+ alias :_start_modified :_start_updated
2121
+ alias :_start_dcterms_modified :_start_updated
2122
+ alias :_start_pubdate :_start_updated
2123
+ alias :_start_dc_date :_start_updated
2124
+
2125
+ def _end_updated
2126
+ value = pop('updated')
2127
+ _save('updated_parsed', parse_date(value))
2128
+ end
2129
+ alias :_end_modified :_end_updated
2130
+ alias :_end_dcterms_modified :_end_updated
2131
+ alias :_end_pubdate :_end_updated
2132
+ alias :_end_dc_date :_end_updated
2133
+
2134
+ def _start_created(attrsD)
2135
+ push('created', true)
2136
+ end
2137
+ alias :_start_dcterms_created :_start_created
2138
+
2139
+ def _end_created
2140
+ value = pop('created')
2141
+ _save('created_parsed', parse_date(value))
2142
+ end
2143
+ alias :_end_dcterms_created :_end_created
2144
+
2145
+ def _start_expirationdate(attrsD)
2146
+ push('expired', true)
2147
+ end
2148
+ def _end_expirationdate
2149
+ _save('expired_parsed', parse_date(pop('expired')))
2150
+ end
2151
+
2152
+ def _start_cc_license(attrsD)
2153
+ push('license', true)
2154
+ value = getAttribute(attrsD, 'rdf:resource')
2155
+ if value and not value.empty?
2156
+ elementstack[-1][2] << value
2157
+ pop('license')
2158
+ end
2159
+ end
2160
+
2161
+ def _start_creativecommons_license(attrsD)
2162
+ push('license', true)
2163
+ end
2164
+
2165
+ def _end_creativecommons_license
2166
+ pop('license')
2167
+ end
2168
+
2169
+ def addTag(term, scheme, label)
2170
+ context = getContext()
2171
+ context['tags'] ||= []
2172
+ tags = context['tags']
2173
+ if (term.nil? or term.empty?) and (scheme.nil? or scheme.empty?) and (label.nil? or label.empty?)
2174
+ return
2175
+ end
2176
+ value = FeedParserDict.new({'term' => term, 'scheme' => scheme, 'label' => label})
2177
+ if not tags.include?value
2178
+ context['tags'] << FeedParserDict.new({'term' => term, 'scheme' => scheme, 'label' => label})
2179
+ end
2180
+ end
2181
+
2182
+ def _start_category(attrsD)
2183
+ $stderr << "entering _start_category with #{attrsD}\n" if $debug
2184
+
2185
+ term = attrsD['term']
2186
+ scheme = attrsD['scheme'] || attrsD['domain']
2187
+ label = attrsD['label']
2188
+ addTag(term, scheme, label)
2189
+ push('category', true)
2190
+ end
2191
+ alias :_start_dc_subject :_start_category
2192
+ alias :_start_keywords :_start_category
2193
+
2194
+ def _end_itunes_keywords
2195
+ pop('itunes_keywords').split.each do |term|
2196
+ addTag(term, 'http://www.itunes.com/', nil)
2197
+ end
2198
+ end
2199
+
2200
+ def _start_itunes_category(attrsD)
2201
+ addTag(attrsD['text'], 'http://www.itunes.com/', nil)
2202
+ push('category', true)
2203
+ end
2204
+
2205
+ def _end_category
2206
+ value = pop('category')
2207
+ return if value.nil? or value.empty?
2208
+ context = getContext()
2209
+ tags = context['tags']
2210
+ if value and not value.empty? and not tags.empty? and not tags[-1]['term']:
2211
+ tags[-1]['term'] = value
2212
+ else
2213
+ addTag(value, nil, nil)
2214
+ end
2215
+ end
2216
+ alias :_end_dc_subject :_end_category
2217
+ alias :_end_keywords :_end_category
2218
+ alias :_end_itunes_category :_end_category
2219
+
2220
+ def _start_cloud(attrsD)
2221
+ getContext()['cloud'] = FeedParserDict.new(attrsD)
2222
+ end
2223
+
2224
+ def _start_link(attrsD)
2225
+ attrsD['rel'] ||= 'alternate'
2226
+ attrsD['type'] ||= 'text/html'
2227
+ attrsD = itsAnHrefDamnIt(attrsD)
2228
+ if attrsD.has_key? 'href'
2229
+ attrsD['href'] = resolveURI(attrsD['href'])
2230
+ end
2231
+ expectingText = @infeed || @inentry || @insource
2232
+ context = getContext()
2233
+ context['links'] ||= []
2234
+ context['links'] << FeedParserDict.new(attrsD)
2235
+ if attrsD['rel'] == 'enclosure'
2236
+ _start_enclosure(attrsD)
2237
+ end
2238
+ if attrsD.has_key? 'href'
2239
+ expectingText = false
2240
+ if (attrsD['rel'] == 'alternate') and @html_types.include?mapContentType(attrsD['type'])
2241
+ context['link'] = attrsD['href']
2242
+ end
2243
+ else
2244
+ push('link', expectingText)
2245
+ end
2246
+ end
2247
+ alias :_start_producturl :_start_link
2248
+
2249
+ def _end_link
2250
+ value = pop('link')
2251
+ context = getContext()
2252
+ if @intextinput
2253
+ context['textinput']['link'] = value
2254
+ end
2255
+ if @inimage
2256
+ context['image']['link'] = value
2257
+ end
2258
+ end
2259
+ alias :_end_producturl :_end_link
2260
+
2261
+ def _start_guid(attrsD)
2262
+ @guidislink = ((attrsD['ispermalink'] || 'true') == 'true')
2263
+ push('id', true)
2264
+ end
2265
+
2266
+ def _end_guid
2267
+ value = pop('id')
2268
+ _save('guidislink', (@guidislink and not getContext().has_key?('link')))
2269
+ if @guidislink:
2270
+ # guid acts as link, but only if 'ispermalink' is not present or is 'true',
2271
+ # and only if the item doesn't already have a link element
2272
+ _save('link', value)
2273
+ end
2274
+ end
2275
+
2276
+
2277
+ def _start_title(attrsD)
2278
+ pushContent('title', attrsD, 'text/plain', @infeed || @inentry || @insource)
2279
+ end
2280
+ alias :_start_dc_title :_start_title
2281
+ alias :_start_media_title :_start_title
2282
+
2283
+ def _end_title
2284
+ value = popContent('title')
2285
+ context = getContext()
2286
+ if @intextinput
2287
+ context['textinput']['title'] = value
2288
+ elsif @inimage
2289
+ context['image']['title'] = value
2290
+ end
2291
+ end
2292
+ alias :_end_dc_title :_end_title
2293
+ alias :_end_media_title :_end_title
2294
+
2295
+ def _start_description(attrsD)
2296
+ context = getContext()
2297
+ if context.has_key?('summary')
2298
+ @summaryKey = 'content'
2299
+ _start_content(attrsD)
2300
+ else
2301
+ pushContent('description', attrsD, 'text/html', @infeed || @inentry || @insource)
2302
+ end
2303
+ end
2304
+
2305
+ def _start_abstract(attrsD)
2306
+ pushContent('description', attrsD, 'text/plain', @infeed || @inentry || @insource)
2307
+ end
2308
+
2309
+ def _end_description
2310
+ if @summaryKey == 'content'
2311
+ _end_content()
2312
+ else
2313
+ value = popContent('description')
2314
+ context = getContext()
2315
+ if @intextinput
2316
+ context['textinput']['description'] = value
2317
+ elsif @inimage:
2318
+ context['image']['description'] = value
2319
+ end
2320
+ end
2321
+ @summaryKey = nil
2322
+ end
2323
+ alias :_end_abstract :_end_description
2324
+
2325
+ def _start_info(attrsD)
2326
+ pushContent('info', attrsD, 'text/plain', true)
2327
+ end
2328
+ alias :_start_feedburner_browserfriendly :_start_info
2329
+
2330
+ def _end_info
2331
+ popContent('info')
2332
+ end
2333
+ alias :_end_feedburner_browserfriendly :_end_info
2334
+
2335
+ def _start_generator(attrsD)
2336
+ if attrsD and not attrsD.empty?
2337
+ attrsD = itsAnHrefDamnIt(attrsD)
2338
+ if attrsD.has_key?('href')
2339
+ attrsD['href'] = resolveURI(attrsD['href'])
2340
+ end
2341
+ end
2342
+ getContext()['generator_detail'] = FeedParserDict.new(attrsD)
2343
+ push('generator', true)
2344
+ end
2345
+
2346
+ def _end_generator
2347
+ value = pop('generator')
2348
+ context = getContext()
2349
+ if context.has_key?('generator_detail')
2350
+ context['generator_detail']['name'] = value
2351
+ end
2352
+ end
2353
+
2354
+ def _start_admin_generatoragent(attrsD)
2355
+ push('generator', true)
2356
+ value = getAttribute(attrsD, 'rdf:resource')
2357
+ if value and not value.empty?
2358
+ elementstack[-1][2] << value
2359
+ end
2360
+ pop('generator')
2361
+ getContext()['generator_detail'] = FeedParserDict.new({'href' => value})
2362
+ end
2363
+
2364
+ def _start_admin_errorreportsto(attrsD)
2365
+ push('errorreportsto', true)
2366
+ value = getAttribute(attrsD, 'rdf:resource')
2367
+ if value and not value.empty?
2368
+ @elementstack[-1][2] << value
2369
+ end
2370
+ pop('errorreportsto')
2371
+ end
2372
+
2373
+ def _start_summary(attrsD)
2374
+ context = getContext()
2375
+ if context.has_key?'summary'
2376
+ @summaryKey = 'content'
2377
+ _start_content(attrsD)
2378
+ else
2379
+ @summaryKey = 'summary'
2380
+ pushContent(@summaryKey, attrsD, 'text/plain', true)
2381
+ end
2382
+ end
2383
+ alias :_start_itunes_summary :_start_summary
2384
+
2385
+ def _end_summary
2386
+ if @summaryKey == 'content':
2387
+ _end_content()
2388
+ else
2389
+ popContent(@summaryKey || 'summary')
2390
+ end
2391
+ @summaryKey = nil
2392
+ end
2393
+ alias :_end_itunes_summary :_end_summary
2394
+
2395
+ def _start_enclosure(attrsD)
2396
+ attrsD = itsAnHrefDamnIt(attrsD)
2397
+ getContext()['enclosures'] ||= []
2398
+ getContext()['enclosures'] << FeedParserDict.new(attrsD)
2399
+ href = attrsD['href']
2400
+ if href and not href.empty?
2401
+ context = getContext()
2402
+ if not context['id']
2403
+ context['id'] = href
2404
+ end
2405
+ end
2406
+ end
2407
+
2408
+ def _start_source(attrsD)
2409
+ @insource = true
2410
+ end
2411
+
2412
+ def _end_source
2413
+ @insource = false
2414
+ getContext()['source'] = Marshal.load(Marshal.dump(@sourcedata))
2415
+ @sourcedata.clear()
2416
+ end
2417
+
2418
+ def _start_content(attrsD)
2419
+ pushContent('content', attrsD, 'text/plain', true)
2420
+ src = attrsD['src']
2421
+ if src and not src.empty?:
2422
+ @contentparams['src'] = src
2423
+ end
2424
+ push('content', true)
2425
+ end
2426
+
2427
+ def _start_prodlink(attrsD)
2428
+ pushContent('content', attrsD, 'text/html', true)
2429
+ end
2430
+
2431
+ def _start_body(attrsD)
2432
+ pushContent('content', attrsD, 'application/xhtml+xml', true)
2433
+ end
2434
+ alias :_start_xhtml_body :_start_body
2435
+
2436
+ def _start_content_encoded(attrsD)
2437
+ pushContent('content', attrsD, 'text/html', true)
2438
+ end
2439
+ alias :_start_fullitem :_start_content_encoded
2440
+
2441
+ def _end_content
2442
+ copyToDescription = (['text/plain'] + @html_types).include? mapContentType(@contentparams['type'])
2443
+ value = popContent('content')
2444
+ if copyToDescription
2445
+ _save('description', value)
2446
+ end
2447
+ alias :_end_body :_end_content
2448
+ alias :_end_xhtml_body :_end_content
2449
+ alias :_end_content_encoded :_end_content
2450
+ alias :_end_fullitem :_end_content
2451
+ alias :_end_prodlink :_end_content
2452
+ end
2453
+
2454
+ def _start_itunes_image(attrsD)
2455
+ push('itunes_image', false)
2456
+ getContext()['image'] = FeedParserDict.new({'href' => attrsD['href']})
2457
+ end
2458
+ alias :_start_itunes_link :_start_itunes_image
2459
+
2460
+ def _end_itunes_block
2461
+ value = pop('itunes_block', false)
2462
+ getContext()['itunes_block'] = (value == 'yes') and true or false
2463
+ end
2464
+
2465
+ def _end_itunes_explicit
2466
+ value = pop('itunes_explicit', false)
2467
+ getContext()['itunes_explicit'] = (value == 'yes') and true or false
2468
+ end
2469
+
2470
+
2471
+ # ISO-8601 date parsing routines written by Fazal Majid.
2472
+ # The ISO 8601 standard is very convoluted and irregular - a full ISO 8601
2473
+ # parser is beyond the scope of feedparser and the current Time.iso8601
2474
+ # method does not work.
2475
+ # A single regular expression cannot parse ISO 8601 date formats into groups
2476
+ # as the standard is highly irregular (for instance is 030104 2003-01-04 or
2477
+ # 0301-04-01), so we use templates instead.
2478
+ # Please note the order in templates is significant because we need a
2479
+ # greedy match.
2480
+ def _parse_date_iso8601(dateString)
2481
+ # Parse a variety of ISO-8601-compatible formats like 20040105
2482
+
2483
+ # What I'm about to show you may be the ugliest code in all of
2484
+ # rfeedparser.
2485
+ # FIXME The century regexp maybe not work ('\d\d$' says "two numbers at
2486
+ # end of line" but we then attach more of a regexp.
2487
+ iso8601_regexps = [ '^(\d{4})-?([01]\d)-([0123]\d)',
2488
+ '^(\d{4})-([01]\d)',
2489
+ '^(\d{4})-?([0123]\d\d)',
2490
+ '^(\d\d)-?([01]\d)-?([0123]\d)',
2491
+ '^(\d\d)-?([0123]\d\d)',
2492
+ '^(\d{4})',
2493
+ '-(\d\d)-?([01]\d)',
2494
+ '-([0123]\d\d)',
2495
+ '-(\d\d)',
2496
+ '--([01]\d)-?([0123]\d)',
2497
+ '--([01]\d)',
2498
+ '---([0123]\d)',
2499
+ '(\d\d$)',
2500
+ ''
2501
+ ]
2502
+ iso8601_values = { '^(\d{4})-?([01]\d)-([0123]\d)' => ['year', 'month', 'day'],
2503
+ '^(\d{4})-([01]\d)' => ['year','month'],
2504
+ '^(\d{4})-?([0123]\d\d)' => ['year', 'ordinal'],
2505
+ '^(\d\d)-?([01]\d)-?([0123]\d)' => ['year','month','day'],
2506
+ '^(\d\d)-?([0123]\d\d)' => ['year','ordinal'],
2507
+ '^(\d{4})' => ['year'],
2508
+ '-(\d\d)-?([01]\d)' => ['year','month'],
2509
+ '-([0123]\d\d)' => ['ordinal'],
2510
+ '-(\d\d)' => ['year'],
2511
+ '--([01]\d)-?([0123]\d)' => ['month','day'],
2512
+ '--([01]\d)' => ['month'],
2513
+ '---([0123]\d)' => ['day'],
2514
+ '(\d\d$)' => ['century'],
2515
+ '' => []
2516
+ }
2517
+ add_to_all = '(T?(\d\d):(\d\d)(?::(\d\d))?([+-](\d\d)(?::(\d\d))?|Z)?)?'
2518
+ add_to_all_fields = ['hour', 'minute', 'second', 'tz', 'tzhour', 'tzmin']
2519
+ # NOTE We use '(?:' to prevent grouping of optional matches (ones trailed
2520
+ # by '?'). The second ':' *are* matched.
2521
+ m = nil
2522
+ param_keys = []
2523
+ iso8601_regexps.each do |s|
2524
+ $stderr << "Trying iso8601 regexp: #{s+add_to_all}\n" if $debug
2525
+ param_keys = iso8601_values[s] + add_to_all_fields
2526
+ m = dateString.match(Regexp.new(s+add_to_all))
2527
+ break if m
2528
+ end
2529
+ return if m.nil? or (m.begin(0).zero? and m.end(0).zero?)
2530
+
2531
+ param_values = m.to_a
2532
+ param_values = param_values[1..-1]
2533
+ params = {}
2534
+ param_keys.each_with_index do |key,i|
2535
+ params[key] = param_values[i]
2536
+ end
2537
+
2538
+ ordinal = params['ordinal'].to_i unless params['ordinal'].nil?
2539
+ year = params['year'] || '--'
2540
+ if year.nil? or year.empty? or year == '--' # FIXME When could the regexp ever return a year equal to '--'?
2541
+ year = Time.now.utc.year
2542
+ elsif year.length == 2
2543
+ # ISO 8601 assumes current century, i.e. 93 -> 2093, NOT 1993
2544
+ year = 100 * (Time.now.utc.year / 100) + year.to_i
2545
+ else
2546
+ year = year.to_i
2547
+ end
2548
+
2549
+ month = params['month'] || '-'
2550
+ if month.nil? or month.empty? or month == '-'
2551
+ # ordinals are NOT normalized by mktime, we simulate them
2552
+ # by setting month=1, day=ordinal
2553
+ if ordinal
2554
+ month = DateTime.ordinal(year,ordinal).month
2555
+ else
2556
+ month = Time.now.utc.month
2557
+ end
2558
+ end
2559
+ month = month.to_i unless month.nil?
2560
+ day = params['day']
2561
+ if day.nil? or day.empty?
2562
+ # see above
2563
+ if ordinal
2564
+ day = DateTime.ordinal(year,ordinal).day
2565
+ elsif params['century'] or params['year'] or params['month']
2566
+ day = 1
2567
+ else
2568
+ day = Time.now.utc.day
2569
+ end
2570
+ else
2571
+ day = day.to_i
2572
+ end
2573
+ # special case of the century - is the first year of the 21st century
2574
+ # 2000 or 2001 ? The debate goes on...
2575
+ if params.has_key? 'century'
2576
+ year = (params['century'].to_i - 1) * 100 + 1
2577
+ end
2578
+ # in ISO 8601 most fields are optional
2579
+ hour = params['hour'].to_i
2580
+ minute = params['minute'].to_i
2581
+ second = params['second'].to_i
2582
+ weekday = nil
2583
+ # daylight savings is complex, but not needed for feedparser's purposes
2584
+ # as time zones, if specified, include mention of whether it is active
2585
+ # (e.g. PST vs. PDT, CET). Using -1 is implementation-dependent and
2586
+ # and most implementations have DST bugs
2587
+ tm = [second, minute, hour, day, month, year, nil, ordinal, false, nil]
2588
+ tz = params['tz']
2589
+ if tz and not tz.empty? and tz != 'Z'
2590
+ # FIXME does this cross over days?
2591
+ if tz[0] == '-'
2592
+ tm[3] += params['tzhour'].to_i
2593
+ tm[4] += params['tzmin'].to_i
2594
+ elsif tz[0] == '+'
2595
+ tm[3] -= params['tzhour'].to_i
2596
+ tm[4] -= params['tzmin'].to_i
2597
+ else
2598
+ return nil
2599
+ end
2600
+ end
2601
+ return Time.utc(*tm) # Magic!
2602
+
2603
+ end
2604
+
2605
+ def _parse_date_onblog(dateString)
2606
+ # Parse a string according to the OnBlog 8-bit date format
2607
+ # 8-bit date handling routes written by ytrewq1
2608
+ korean_year = u("년") # b3e2 in euc-kr
2609
+ korean_month = u("월") # bff9 in euc-kr
2610
+ korean_day = u("일") # c0cf in euc-kr
2611
+
2612
+
2613
+ korean_onblog_date_re = /(\d{4})#{korean_year}\s+(\d{2})#{korean_month}\s+(\d{2})#{korean_day}\s+(\d{2}):(\d{2}):(\d{2})/
2614
+
2615
+
2616
+ m = korean_onblog_date_re.match(dateString)
2617
+ return unless m
2618
+ w3dtfdate = "#{m[1]}-#{m[2]}-#{m[3]}T#{m[4]}:#{m[5]}:#{m[6]}+09:00"
2619
+
2620
+ $stderr << "OnBlog date parsed as: %s\n" % w3dtfdate if $debug
2621
+ return _parse_date_w3dtf(w3dtfdate)
2622
+ end
2623
+
2624
+ def _parse_date_nate(dateString)
2625
+ # Parse a string according to the Nate 8-bit date format
2626
+ # 8-bit date handling routes written by ytrewq1
2627
+ korean_am = u("오전") # bfc0 c0fc in euc-kr
2628
+ korean_pm = u("오후") # bfc0 c8c4 in euc-kr
2629
+
2630
+ korean_nate_date_re = /(\d{4})-(\d{2})-(\d{2})\s+(#{korean_am}|#{korean_pm})\s+(\d{0,2}):(\d{0,2}):(\d{0,2})/
2631
+ m = korean_nate_date_re.match(dateString)
2632
+ return unless m
2633
+ hour = m[5].to_i
2634
+ ampm = m[4]
2635
+ if ampm == korean_pm
2636
+ hour += 12
2637
+ end
2638
+ hour = hour.to_s.rjust(2,'0')
2639
+ w3dtfdate = "#{m[1]}-#{m[2]}-#{m[3]}T#{hour}:#{m[6]}:#{m[7]}+09:00"
2640
+ $stderr << "Nate date parsed as: %s\n" % w3dtfdate if $debug
2641
+ return _parse_date_w3dtf(w3dtfdate)
2642
+ end
2643
+
2644
+ def _parse_date_mssql(dateString)
2645
+ mssql_date_re = /(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})(\.\d+)?/
2646
+
2647
+ m = mssql_date_re.match(dateString)
2648
+ return unless m
2649
+ w3dtfdate = "#{m[1]}-#{m[2]}-#{m[3]}T#{m[4]}:#{m[5]}:#{m[6]}+09:00"
2650
+ $stderr << "MS SQL date parsed as: %s\n" % w3dtfdate if $debug
2651
+ return _parse_date_w3dtf(w3dtfdate)
2652
+ end
2653
+
2654
+ def _parse_date_greek(dateString)
2655
+ # Parse a string according to a Greek 8-bit date format
2656
+ # Unicode strings for Greek date strings
2657
+ greek_months = {
2658
+ u("Ιαν") => u("Jan"), # c9e1ed in iso-8859-7
2659
+ u("Φεβ") => u("Feb"), # d6e5e2 in iso-8859-7
2660
+ u("Μάώ") => u("Mar"), # ccdcfe in iso-8859-7
2661
+ u("Μαώ") => u("Mar"), # cce1fe in iso-8859-7
2662
+ u("Απρ") => u("Apr"), # c1f0f1 in iso-8859-7
2663
+ u("Μάι") => u("May"), # ccdce9 in iso-8859-7
2664
+ u("Μαϊ") => u("May"), # cce1fa in iso-8859-7
2665
+ u("Μαι") => u("May"), # cce1e9 in iso-8859-7
2666
+ u("Ιούν") => u("Jun"), # c9effded in iso-8859-7
2667
+ u("Ιον") => u("Jun"), # c9efed in iso-8859-7
2668
+ u("Ιούλ") => u("Jul"), # c9effdeb in iso-8859-7
2669
+ u("Ιολ") => u("Jul"), # c9f9eb in iso-8859-7
2670
+ u("Αύγ") => u("Aug"), # c1fde3 in iso-8859-7
2671
+ u("Αυγ") => u("Aug"), # c1f5e3 in iso-8859-7
2672
+ u("Σεπ") => u("Sep"), # d3e5f0 in iso-8859-7
2673
+ u("Οκτ") => u("Oct"), # cfeaf4 in iso-8859-7
2674
+ u("Νοέ") => u("Nov"), # cdefdd in iso-8859-7
2675
+ u("Νοε") => u("Nov"), # cdefe5 in iso-8859-7
2676
+ u("Δεκ") => u("Dec"), # c4e5ea in iso-8859-7
2677
+ }
2678
+
2679
+ greek_wdays = {
2680
+ u("Κυρ") => u("Sun"), # caf5f1 in iso-8859-7
2681
+ u("Δευ") => u("Mon"), # c4e5f5 in iso-8859-7
2682
+ u("Τρι") => u("Tue"), # d4f1e9 in iso-8859-7
2683
+ u("Τετ") => u("Wed"), # d4e5f4 in iso-8859-7
2684
+ u("Πεμ") => u("Thu"), # d0e5ec in iso-8859-7
2685
+ u("Παρ") => u("Fri"), # d0e1f1 in iso-8859-7
2686
+ u("Σαβ") => u("Sat"), # d3e1e2 in iso-8859-7
2687
+ }
2688
+
2689
+ greek_date_format = /([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)/
2690
+
2691
+ m = greek_date_format.match(dateString)
2692
+ return unless m
2693
+ begin
2694
+ wday = greek_wdays[m[1]]
2695
+ month = greek_months[m[3]]
2696
+ rescue
2697
+ return nil
2698
+ end
2699
+ rfc822date = "#{wday}, #{m[2]} #{month} #{m[4]} #{m[5]}:#{m[6]}:#{m[7]} #{m[8]}"
2700
+ $stderr << "Greek date parsed as: #{rfc822date}\n" if $debug
2701
+ return _parse_date_rfc822(rfc822date)
2702
+ end
2703
+
2704
+ def _parse_date_hungarian(dateString)
2705
+ # Parse a string according to a Hungarian 8-bit date format.
2706
+ hungarian_date_format_re = /(\d{4})-([^-]+)-(\d{0,2})T(\d{0,2}):(\d{2})((\+|-)(\d{0,2}:\d{2}))/
2707
+ m = hungarian_date_format_re.match(dateString)
2708
+ return unless m
2709
+
2710
+ # Unicode strings for Hungarian date strings
2711
+ hungarian_months = {
2712
+ u("január") => u("01"), # e1 in iso-8859-2
2713
+ u("februári") => u("02"), # e1 in iso-8859-2
2714
+ u("március") => u("03"), # e1 in iso-8859-2
2715
+ u("április") => u("04"), # e1 in iso-8859-2
2716
+ u("máujus") => u("05"), # e1 in iso-8859-2
2717
+ u("június") => u("06"), # fa in iso-8859-2
2718
+ u("július") => u("07"), # fa in iso-8859-2
2719
+ u("augusztus") => u("08"),
2720
+ u("szeptember") => u("09"),
2721
+ u("október") => u("10"), # f3 in iso-8859-2
2722
+ u("november") => u("11"),
2723
+ u("december") => u("12"),
2724
+ }
2725
+ begin
2726
+ month = hungarian_months[m[2]]
2727
+ day = m[3].rjust(2,'0')
2728
+ hour = m[4].rjust(2,'0')
2729
+ rescue
2730
+ return
2731
+ end
2732
+
2733
+ w3dtfdate = "#{m[1]}-#{month}-#{day}T#{hour}:#{m[5]}:00#{m[6]}"
2734
+ $stderr << "Hungarian date parsed as: #{w3dtfdate}\n" if $debug
2735
+ return _parse_date_w3dtf(w3dtfdate)
2736
+ end
2737
+
2738
+ def rollover(num, modulus)
2739
+ return num % modulus, num / modulus
2740
+ end
2741
+
2742
+ def set_self(num, modulus)
2743
+ r = num / modulus
2744
+ if r == 0
2745
+ return num
2746
+ end
2747
+ return r
2748
+ end
2749
+ # W3DTF-style date parsing
2750
+ # FIXME shouldn't it be "W3CDTF"?
2751
+ def _parse_date_w3dtf(dateString)
2752
+ # Ruby's Time docs claim w3cdtf is an alias for iso8601 which is an alias for xmlschema
2753
+ # Whatever it is, it doesn't work. This has been fixed in Ruby 1.9 and
2754
+ # in Ruby on Rails, but not really. They don't fix the 25 hour or 61 minute or 61 second rollover and fail in other ways.
2755
+
2756
+ m = dateString.match(/^(\d{4})-?(?:(?:([01]\d)-?(?:([0123]\d)(?:T(\d\d):(\d\d):(\d\d)([+-]\d\d:\d\d|Z))?)?)?)?/)
2757
+
2758
+ w3 = m[1..3].map{|s| s=s.to_i; s += 1 if s == 0;s} # Map the year, month and day to integers and, if they were nil, set them to 1
2759
+ w3 += m[4..6].map{|s| s.to_i} # Map the hour, minute and second to integers
2760
+ w3 << m[-1] # Leave the timezone as a String
2761
+
2762
+ # FIXME this next bit needs some serious refactoring
2763
+ # Rollover times. 0 minutes and 61 seconds -> 1 minute and 1 second
2764
+ w3[5],r = rollover(w3[5], 60) # rollover seconds
2765
+ w3[4] += r
2766
+ w3[4],r = rollover(w3[4], 60) # rollover minutes
2767
+ w3[3] += r
2768
+ w3[3],r = rollover(w3[3], 24) # rollover hours
2769
+
2770
+ w3[2] = w3[2] + r
2771
+ if w3[1] > 12
2772
+ w3[1],r = rollover(w3[1],12)
2773
+ w3[1] = 12 if w3[1] == 0
2774
+ w3[0] += r
2775
+ end
2776
+
2777
+ num_days = Time.days_in_month(w3[1], w3[0])
2778
+ while w3[2] > num_days
2779
+ w3[2] -= num_days
2780
+ w3[1] += 1
2781
+ if w3[1] > 12
2782
+ w3[0] += 1
2783
+ w3[1] = set_self(w3[1], 12)
2784
+ end
2785
+ num_days = Time.days_in_month(w3[1], w3[0])
2786
+ end
2787
+
2788
+
2789
+ unless w3[6].class != String
2790
+ if /^-/ =~ w3[6] # Zone offset goes backwards
2791
+ w3[6][0] = '+'
2792
+ elsif /^\+/ =~ w3[6]
2793
+ w3[6][0] = '-'
2794
+ end
2795
+ end
2796
+ return Time.utc(w3[0], w3[1], w3[2] , w3[3], w3[4], w3[5])+Time.zone_offset(w3[6] || "UTC")
2797
+ end
2798
+
2799
+ def _parse_date_rfc822(dateString)
2800
+ # Parse an RFC822, RFC1123, RFC2822 or asctime-style date
2801
+ # These first few lines are to fix up the stupid proprietary format from Disney
2802
+ unknown_timezones = { 'AT' => 'EDT', 'ET' => 'EST',
2803
+ 'CT' => 'CST', 'MT' => 'MST',
2804
+ 'PT' => 'PST'
2805
+ }
2806
+
2807
+ mon = dateString.split[2]
2808
+ if mon.length > 3 and Time::RFC2822_MONTH_NAME.include?mon[0..2]
2809
+ dateString.sub!(mon,mon[0..2])
2810
+ end
2811
+ if dateString[-3..-1] != "GMT" and unknown_timezones[dateString[-2..-1]]
2812
+ dateString[-2..-1] = unknown_timezones[dateString[-2..-1]]
2813
+ end
2814
+ # Okay, the Disney date format should be fixed up now.
2815
+ rfc = dateString.match(/([A-Za-z]{3}), ([0123]\d) ([A-Za-z]{3}) (\d{4})( (\d\d):(\d\d)(?::(\d\d))? ([A-Za-z]{3}))?/)
2816
+ if rfc.to_a.length > 1 and rfc.to_a.include? nil
2817
+ dow, day, mon, year, hour, min, sec, tz = rfc[1..-1]
2818
+ hour,min,sec = [hour,min,sec].map{|e| e.to_s.rjust(2,'0') }
2819
+ tz ||= "GMT"
2820
+ end
2821
+ asctime_match = dateString.match(/([A-Za-z]{3}) ([A-Za-z]{3}) (\d?\d) (\d\d):(\d\d):(\d\d) ([A-Za-z]{3}) (\d\d\d\d)/).to_a
2822
+ if asctime_match.to_a.length > 1
2823
+ # Month-abbr dayofmonth hour:minute:second year
2824
+ dow, mon, day, hour, min, sec, tz, year = asctime_match[1..-1]
2825
+ day.to_s.rjust(2,'0')
2826
+ end
2827
+ if (rfc.to_a.length > 1 and rfc.to_a.include? nil) or asctime_match.to_a.length > 1
2828
+ ds = "#{dow}, #{day} #{mon} #{year} #{hour}:#{min}:#{sec} #{tz}"
2829
+ else
2830
+ ds = dateString
2831
+ end
2832
+ t = Time.rfc2822(ds).utc
2833
+ return t
2834
+ end
2835
+
2836
+ def _parse_date_perforce(aDateString) # FIXME not in 4.1?
2837
+ # Parse a date in yyyy/mm/dd hh:mm:ss TTT format
2838
+ # Note that there is a day of the week at the beginning
2839
+ # Ex. Fri, 2006/09/15 08:19:53 EDT
2840
+ return Time.parse(aDateString).utc
2841
+ end
2842
+
2843
+ def extract_tuple(atime)
2844
+ # NOTE leave the error handling to parse_date
2845
+ t = [atime.year, atime.month, atime.mday, atime.hour,
2846
+ atime.min, atime.sec, (atime.wday-1) % 7, atime.yday,
2847
+ atime.isdst
2848
+ ]
2849
+ # yay for modulus! yaaaaaay! its 530 am and i should be sleeping! yaay!
2850
+ t[0..-2].map!{|s| s.to_i}
2851
+ t[-1] = t[-1] ? 1 : 0
2852
+ return t
2853
+ end
2854
+
2855
+ def parse_date(dateString)
2856
+ @date_handlers.each do |handler|
2857
+ begin
2858
+ $stderr << "Trying date_handler #{handler}\n" if $debug
2859
+ datething = extract_tuple(send(handler,dateString))
2860
+ return datething
2861
+ rescue Exception => e
2862
+ $stderr << "#{handler} raised #{e}\n" if $debug
2863
+ end
2864
+ end
2865
+ return nil
2866
+ end
2867
+
2868
+ end # End FeedParserMixin
2869
+
2870
+ class StrictFeedParser < XML::SAX::HandlerBase # expat
2871
+ include FeedParserMixin
2872
+
2873
+ attr_accessor :bozo, :entries, :feeddata, :exc
2874
+ def initialize(baseuri, baselang, encoding)
2875
+ $stderr << "trying StrictFeedParser\n" if $debug
2876
+ startup(baseuri, baselang, encoding)
2877
+ @bozo = false
2878
+ @exc = nil
2879
+ super()
2880
+ end
2881
+
2882
+ def getPos
2883
+ [@locator.getSystemId, @locator.getLineNumber]
2884
+ end
2885
+
2886
+ def getAttrs(attrs)
2887
+ ret = []
2888
+ for i in 0..attrs.getLength
2889
+ ret.push([attrs.getName(i), attrs.getValue(i)])
2890
+ end
2891
+ ret
2892
+ end
2893
+
2894
+ def setDocumentLocator(loc)
2895
+ @locator = loc
2896
+ end
2897
+
2898
+ def startDoctypeDecl(name, pub_sys, long_name, uri)
2899
+ #Nothing is done here. What could we do that is neat and useful?
2900
+ end
2901
+
2902
+ def startNamespaceDecl(prefix, uri)
2903
+ trackNamespace(prefix, uri)
2904
+ end
2905
+
2906
+ def endNamespaceDecl(prefix)
2907
+ end
2908
+
2909
+ def startElement(name, attrs)
2910
+ name =~ /^(([^;]*);)?(.+)$/ # Snag namespaceuri from name
2911
+ namespaceuri = ($2 || '').downcase
2912
+ name = $3
2913
+ if /backend\.userland\.com\/rss/ =~ namespaceuri
2914
+ # match any backend.userland.com namespace
2915
+ namespaceuri = 'http://backend.userland.com/rss'
2916
+ end
2917
+ prefix = @matchnamespaces[namespaceuri]
2918
+ # No need to raise UndeclaredNamespace, Expat does that for us with
2919
+ "unbound prefix (XMLParserError)"
2920
+ if prefix and not prefix.empty?
2921
+ name = prefix + ':' + name
2922
+ end
2923
+ name.downcase!
2924
+ unknown_starttag(name, attrs)
2925
+ end
2926
+
2927
+ def character(text, start, length)
2928
+ #handle_data(CGI.unescapeHTML(text))
2929
+ handle_data(text)
2930
+ end
2931
+ # expat provides "character" not "characters"!
2932
+ alias :characters :character # Just in case.
2933
+
2934
+ def startCdata(content)
2935
+ handle_data(content)
2936
+ end
2937
+
2938
+ def endElement(name)
2939
+ name =~ /^(([^;]*);)?(.+)$/ # Snag namespaceuri from name
2940
+ namespaceuri = ($2 || '').downcase
2941
+ prefix = @matchnamespaces[namespaceuri]
2942
+ if prefix and not prefix.empty?
2943
+ localname = prefix + ':' + name
2944
+ end
2945
+ name.downcase!
2946
+ unknown_endtag(name)
2947
+ end
2948
+
2949
+ def comment(comment)
2950
+ handle_comment(comment)
2951
+ end
2952
+
2953
+ def entityDecl(*foo)
2954
+ end
2955
+
2956
+ def unparsedEntityDecl(*foo)
2957
+ end
2958
+ def error(exc)
2959
+ @bozo = true
2960
+ @exc = exc
2961
+ end
2962
+
2963
+ def fatalError(exc)
2964
+ error(exc)
2965
+ raise exc
2966
+ end
2967
+ end
2968
+
2969
+ class LooseFeedParser < BetterSGMLParser
2970
+ include FeedParserMixin
2971
+ # We write the methods that were in BaseHTMLProcessor in the python code
2972
+ # in here directly. We do this because if we inherited from
2973
+ # BaseHTMLProcessor but then included from FeedParserMixin, the methods
2974
+ # of Mixin would overwrite the methods we inherited from
2975
+ # BaseHTMLProcessor. This is exactly the opposite of what we want to
2976
+ # happen!
2977
+
2978
+ attr_accessor :encoding, :bozo, :feeddata, :entries, :namespacesInUse
2979
+
2980
+ Elements_No_End_Tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
2981
+ 'img', 'input', 'isindex', 'link', 'meta', 'param']
2982
+ New_Declname_Re = /[a-zA-Z][-_.a-zA-Z0-9:]*\s*/
2983
+ alias :sgml_feed :feed # feed needs to mapped to feeddata, not the SGMLParser method feed. I think.
2984
+ def feed
2985
+ @feeddata
2986
+ end
2987
+ def feed=(data)
2988
+ @feeddata = data
2989
+ end
2990
+
2991
+ def initialize(baseuri, baselang, encoding)
2992
+ startup(baseuri, baselang, encoding)
2993
+ super() # Keep the parentheses! No touchy.
2994
+ end
2995
+
2996
+ def reset
2997
+ @pieces = []
2998
+ super
2999
+ end
3000
+
3001
+ def parse(data)
3002
+ data.gsub!(/<!((?!DOCTYPE|--|\[))/i, '&lt;!\1')
3003
+ data.gsub!(/<([^<\s]+?)\s*\/>/) do |tag|
3004
+ clean = tag[1..-3].strip
3005
+ if Elements_No_End_Tag.include?clean
3006
+ tag
3007
+ else
3008
+ '<'+clean+'></'+clean+'>'
3009
+ end
3010
+ end
3011
+
3012
+ data.gsub!(/&#39;/, "'")
3013
+ data.gsub!(/&#34;/, "'")
3014
+ if @encoding and not @encoding.empty? # FIXME unicode check type(u'')
3015
+ data = uconvert(data,'utf-8',@encoding)
3016
+ end
3017
+ sgml_feed(data) # see the alias above
3018
+ end
3019
+
3020
+
3021
+ def decodeEntities(element, data)
3022
+ data.gsub!('&#60;', '&lt;')
3023
+ data.gsub!('&#x3c;', '&lt;')
3024
+ data.gsub!('&#62;', '&gt;')
3025
+ data.gsub!('&#x3e;', '&gt;')
3026
+ data.gsub!('&#38;', '&amp;')
3027
+ data.gsub!('&#x26;', '&amp;')
3028
+ data.gsub!('&#34;', '&quot;')
3029
+ data.gsub!('&#x22;', '&quot;')
3030
+ data.gsub!('&#39;', '&apos;')
3031
+ data.gsub!('&#x27;', '&apos;')
3032
+ if @contentparams.has_key? 'type' and not ((@contentparams['type'] || 'xml') =~ /xml$/u)
3033
+ data.gsub!('&lt;', '<')
3034
+ data.gsub!('&gt;', '>')
3035
+ data.gsub!('&amp;', '&')
3036
+ data.gsub!('&quot;', '"')
3037
+ data.gsub!('&apos;', "'")
3038
+ end
3039
+ return data
3040
+ end
3041
+ end
3042
+
3043
+ def FeedParser.resolveRelativeURIs(htmlSource, baseURI, encoding)
3044
+ $stderr << "entering resolveRelativeURIs\n" if $debug # FIXME write a decent logger
3045
+ relative_uris = [ ['a','href'],
3046
+ ['applet','codebase'],
3047
+ ['area','href'],
3048
+ ['blockquote','cite'],
3049
+ ['body','background'],
3050
+ ['del','cite'],
3051
+ ['form','action'],
3052
+ ['frame','longdesc'],
3053
+ ['frame','src'],
3054
+ ['iframe','longdesc'],
3055
+ ['iframe','src'],
3056
+ ['head','profile'],
3057
+ ['img','longdesc'],
3058
+ ['img','src'],
3059
+ ['img','usemap'],
3060
+ ['input','src'],
3061
+ ['input','usemap'],
3062
+ ['ins','cite'],
3063
+ ['link','href'],
3064
+ ['object','classid'],
3065
+ ['object','codebase'],
3066
+ ['object','data'],
3067
+ ['object','usemap'],
3068
+ ['q','cite'],
3069
+ ['script','src'],
3070
+ ]
3071
+ h = Hpricot(htmlSource)
3072
+ relative_uris.each do |l|
3073
+ ename, eattr = l
3074
+ h.search(ename).each do |elem|
3075
+ euri = elem.attributes[eattr]
3076
+ if euri and not euri.empty? and URI.parse(euri).relative?
3077
+ elem.attributes[eattr] = urljoin(baseURI, euri)
3078
+ end
3079
+ end
3080
+ end
3081
+ return h.to_html
3082
+ end
3083
+
3084
+ class SanitizerDoc < Hpricot::Doc
3085
+
3086
+ def scrub
3087
+ traverse_all_element do |e|
3088
+ if e.elem?
3089
+ if Acceptable_Elements.include?e.name
3090
+ e.strip_attributes
3091
+ else
3092
+ if Unacceptable_Elements_With_End_Tag.include?e.name
3093
+ e.inner_html = ''
3094
+ end
3095
+ e.swap(SanitizerDoc.new(e.children).scrub.to_html)
3096
+ # This works because the children swapped in are brought in "after" the current element.
3097
+ end
3098
+ elsif e.doctype?
3099
+ e.parent.children.delete(e)
3100
+ elsif e.text?
3101
+ ets = e.to_s
3102
+ ets.gsub!(/&#39;/, "'")
3103
+ ets.gsub!(/&#34;/, '"')
3104
+ ets.gsub!(/\r/,'')
3105
+ e.swap(ets)
3106
+ else
3107
+ end
3108
+ end
3109
+ # yes, that '/' should be there. It's a search method. See the Hpricot docs.
3110
+
3111
+ unless $compatible # FIXME not properly recursive, see comment in recursive_strip
3112
+ (self/tag).strip_style(@config[:allow_css_properties], @config[:allow_css_keywords])
3113
+ end
3114
+ return self
3115
+ end
3116
+ end
3117
+
3118
+ def SanitizerDoc(html)
3119
+ FeedParser::SanitizerDoc.new(Hpricot.make(html))
3120
+ end
3121
+ module_function(:SanitizerDoc)
3122
+ def self.sanitizeHTML(html,encoding)
3123
+ # FIXME Tidy not yet supported
3124
+ html = html.gsub(/<!((?!DOCTYPE|--|\[))/, '&lt;!\1')
3125
+ h = SanitizerDoc(html)
3126
+ h = h.scrub
3127
+ return h.to_html.strip
3128
+ end
3129
+
3130
+
3131
+
3132
+ def self.getCharacterEncoding(feed, xml_data)
3133
+ # Get the character encoding of the XML document
3134
+ $stderr << "In getCharacterEncoding\n" if $debug
3135
+ sniffed_xml_encoding = nil
3136
+ xml_encoding = nil
3137
+ true_encoding = nil
3138
+ begin
3139
+ http_headers = feed.meta
3140
+ http_content_type = feed.meta['content-type'].split(';')[0]
3141
+ encoding_scan = feed.meta['content-type'].to_s.scan(/charset\s*=\s*(.*?)(?:"|')*$/)
3142
+ http_encoding = encoding_scan.flatten[0].to_s.gsub(/("|')/,'')
3143
+ http_encoding = nil if http_encoding.empty?
3144
+ # FIXME Open-Uri returns iso8859-1 if there is no charset header,
3145
+ # but that doesn't pass the tests. Open-Uri claims its following
3146
+ # the right RFC. Are they wrong or do we need to change the tests?
3147
+ rescue NoMethodError
3148
+ http_headers = {}
3149
+ http_content_type = nil
3150
+ http_encoding = nil
3151
+ end
3152
+ # Must sniff for non-ASCII-compatible character encodings before
3153
+ # searching for XML declaration. This heuristic is defined in
3154
+ # section F of the XML specification:
3155
+ # http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
3156
+ begin
3157
+ if xml_data[0..3] == "\x4c\x6f\xa7\x94"
3158
+ # EBCDIC
3159
+ xml_data = _ebcdic_to_ascii(xml_data)
3160
+ elsif xml_data[0..3] == "\x00\x3c\x00\x3f"
3161
+ # UTF-16BE
3162
+ sniffed_xml_encoding = 'utf-16be'
3163
+ xml_data = uconvert(xml_data, 'utf-16be', 'utf-8')
3164
+ elsif xml_data.size >= 4 and xml_data[0..1] == "\xfe\xff" and xml_data[2..3] != "\x00\x00"
3165
+ # UTF-16BE with BOM
3166
+ sniffed_xml_encoding = 'utf-16be'
3167
+ xml_data = uconvert(xml_data[2..-1], 'utf-16be', 'utf-8')
3168
+ elsif xml_data[0..3] == "\x3c\x00\x3f\x00"
3169
+ # UTF-16LE
3170
+ sniffed_xml_encoding = 'utf-16le'
3171
+ xml_data = uconvert(xml_data, 'utf-16le', 'utf-8')
3172
+ elsif xml_data.size >=4 and xml_data[0..1] == "\xff\xfe" and xml_data[2..3] != "\x00\x00"
3173
+ # UTF-16LE with BOM
3174
+ sniffed_xml_encoding = 'utf-16le'
3175
+ xml_data = uconvert(xml_data[2..-1], 'utf-16le', 'utf-8')
3176
+ elsif xml_data[0..3] == "\x00\x00\x00\x3c"
3177
+ # UTF-32BE
3178
+ sniffed_xml_encoding = 'utf-32be'
3179
+ xml_data = uconvert(xml_data, 'utf-32be', 'utf-8')
3180
+ elsif xml_data[0..3] == "\x3c\x00\x00\x00"
3181
+ # UTF-32LE
3182
+ sniffed_xml_encoding = 'utf-32le'
3183
+ xml_data = uconvert(xml_data, 'utf-32le', 'utf-8')
3184
+ elsif xml_data[0..3] == "\x00\x00\xfe\xff"
3185
+ # UTF-32BE with BOM
3186
+ sniffed_xml_encoding = 'utf-32be'
3187
+ xml_data = uconvert(xml_data[4..-1], 'utf-32BE', 'utf-8')
3188
+ elsif xml_data[0..3] == "\xff\xfe\x00\x00"
3189
+ # UTF-32LE with BOM
3190
+ sniffed_xml_encoding = 'utf-32le'
3191
+ xml_data = uconvert(xml_data[4..-1], 'utf-32le', 'utf-8')
3192
+ elsif xml_data[0..2] == "\xef\xbb\xbf"
3193
+ # UTF-8 with BOM
3194
+ sniffed_xml_encoding = 'utf-8'
3195
+ xml_data = xml_data[3..-1]
3196
+ else
3197
+ # ASCII-compatible
3198
+ end
3199
+ xml_encoding_match = /^<\?.*encoding=[\'"](.*?)[\'"].*\?>/.match(xml_data)
3200
+ rescue
3201
+ xml_encoding_match = nil
3202
+ end
3203
+ if xml_encoding_match
3204
+ xml_encoding = xml_encoding_match[1].downcase
3205
+ xencodings = ['iso-10646-ucs-2', 'ucs-2', 'csunicode', 'iso-10646-ucs-4', 'ucs-4', 'csucs4', 'utf-16', 'utf-32', 'utf_16', 'utf_32', 'utf16', 'u16']
3206
+ if sniffed_xml_encoding and xencodings.include?xml_encoding
3207
+ xml_encoding = sniffed_xml_encoding
3208
+ end
3209
+ end
3210
+
3211
+ acceptable_content_type = false
3212
+ application_content_types = ['application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity']
3213
+ text_content_types = ['text/xml', 'text/xml-external-parsed-entity']
3214
+
3215
+ if application_content_types.include?(http_content_type) or (/^application\// =~ http_content_type and /\+xml$/ =~ http_content_type)
3216
+ acceptable_content_type = true
3217
+ true_encoding = http_encoding || xml_encoding || 'utf-8'
3218
+ elsif text_content_types.include?(http_content_type) or (/^text\// =~ http_content_type and /\+xml$/ =~ http_content_type)
3219
+ acceptable_content_type = true
3220
+ true_encoding = http_encoding || 'us-ascii'
3221
+ elsif /^text\// =~ http_content_type
3222
+ true_encoding = http_encoding || 'us-ascii'
3223
+ elsif http_headers and not http_headers.empty? and not http_headers.has_key?'content-type'
3224
+ true_encoding = xml_encoding || 'iso-8859-1'
3225
+ else
3226
+ true_encoding = xml_encoding || 'utf-8'
3227
+ end
3228
+ return true_encoding, http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type
3229
+ end
3230
+
3231
+ def self.toUTF8(data, encoding)
3232
+ =begin
3233
+ Changes an XML data stream on the fly to specify a new encoding
3234
+
3235
+ data is a raw sequence of bytes (not Unicode) that is presumed to be in %encoding already
3236
+ encoding is a string recognized by encodings.aliases
3237
+ =end
3238
+ $stderr << "entering self.toUTF8, trying encoding %s\n" % encoding if $debug
3239
+ # NOTE we must use double quotes when dealing with \x encodings!
3240
+ if (data.size >= 4 and data[0..1] == "\xfe\xff" and data[2..3] != "\x00\x00")
3241
+ if $debug
3242
+ $stderr << "stripping BOM\n"
3243
+ if encoding != 'utf-16be'
3244
+ $stderr << "string utf-16be instead\n"
3245
+ end
3246
+ end
3247
+ encoding = 'utf-16be'
3248
+ data = data[2..-1]
3249
+ elsif (data.size >= 4 and data[0..1] == "\xff\xfe" and data[2..3] != "\x00\x00")
3250
+ if $debug
3251
+ $stderr << "stripping BOM\n"
3252
+ $stderr << "trying utf-16le instead\n" if encoding != 'utf-16le'
3253
+ end
3254
+ encoding = 'utf-16le'
3255
+ data = data[2..-1]
3256
+ elsif (data[0..2] == "\xef\xbb\xbf")
3257
+ if $debug
3258
+ $stderr << "stripping BOM\n"
3259
+ $stderr << "trying utf-8 instead\n" if encoding != 'utf-8'
3260
+ end
3261
+ encoding = 'utf-8'
3262
+ data = data[3..-1]
3263
+ elsif (data[0..3] == "\x00\x00\xfe\xff")
3264
+ if $debug
3265
+ $stderr << "stripping BOM\n"
3266
+ if encoding != 'utf-32be'
3267
+ $stderr << "trying utf-32be instead\n"
3268
+ end
3269
+ end
3270
+ encoding = 'utf-32be'
3271
+ data = data[4..-1]
3272
+ elsif (data[0..3] == "\xff\xfe\x00\x00")
3273
+ if $debug
3274
+ $stderr << "stripping BOM\n"
3275
+ if encoding != 'utf-32le'
3276
+ $stderr << "trying utf-32le instead\n"
3277
+ end
3278
+ end
3279
+ encoding = 'utf-32le'
3280
+ data = data[4..-1]
3281
+ end
3282
+ begin
3283
+ newdata = uconvert(data, encoding, 'utf-8')
3284
+ rescue => details
3285
+ end
3286
+ $stderr << "successfully converted #{encoding} data to utf-8\n" if $debug
3287
+ declmatch = /^<\?xml[^>]*?>/
3288
+ newdecl = "<?xml version=\'1.0\' encoding=\'utf-8\'?>"
3289
+ if declmatch =~ newdata
3290
+ newdata.sub!(declmatch, newdecl)
3291
+ else
3292
+ newdata = newdecl + "\n" + newdata
3293
+ end
3294
+ return newdata
3295
+ end
3296
+
3297
+ def self.stripDoctype(data)
3298
+ =begin
3299
+ Strips DOCTYPE from XML document, returns (rss_version, stripped_data)
3300
+
3301
+ rss_version may be 'rss091n' or None
3302
+ stripped_data is the same XML document, minus the DOCTYPE
3303
+ =end
3304
+ entity_pattern = /<!ENTITY(.*?)>/m # m is for Regexp::MULTILINE
3305
+ data = data.gsub(entity_pattern,'')
3306
+
3307
+ doctype_pattern = /<!DOCTYPE(.*?)>/m
3308
+ doctype_results = data.scan(doctype_pattern)
3309
+ if doctype_results and doctype_results[0]
3310
+ doctype = doctype_results[0][0]
3311
+ else
3312
+ doctype = ''
3313
+ end
3314
+
3315
+ if /netscape/ =~ doctype.downcase
3316
+ version = 'rss091n'
3317
+ else
3318
+ version = nil
3319
+ end
3320
+ data = data.sub(doctype_pattern, '')
3321
+ return version, data
3322
+ end
3323
+
3324
+ def parse(*args); FeedParser.parse(*args); end
3325
+ def FeedParser.parse(furi, options={})
3326
+ # Parse a feed from a URL, file, stream or string
3327
+ $compatible = options[:compatible] || $compatible # Use the default compatibility if compatible is nil
3328
+ result = FeedParserDict.new
3329
+ result['feed'] = FeedParserDict.new
3330
+ result['entries'] = []
3331
+ if options[:modified]
3332
+ options[:modified] = Time.parse(options[:modified]).rfc2822
3333
+ # FIXME this ignores all of our time parsing work. Does it matter?
3334
+ end
3335
+ result['bozo'] = false
3336
+ handlers = options[:handlers]
3337
+
3338
+ if handlers.class != Array # FIXME why does this happen?
3339
+ handlers = [handlers]
3340
+ end
3341
+
3342
+ begin
3343
+ if URI::parse(furi).class == URI::Generic
3344
+ f = open(furi) # OpenURI doesn't behave well when passing HTTP options to a file.
3345
+ else
3346
+ # And when you do pass them, make sure they aren't just nil (this still true?)
3347
+ newd = {}
3348
+ newd["If-None-Match"] = options[:etag] unless options[:etag].nil?
3349
+ newd["If-Modified-Since"] = options[:modified] unless options[:modified].nil?
3350
+ newd["User-Agent"] = (options[:agent] || USER_AGENT).to_s
3351
+ newd["Referer"] = options[:referrer] unless options[:referrer].nil?
3352
+ newd["Content-Location"] = options[:content_location] unless options[:content_location].nil?
3353
+ newd["Content-Language"] = options[:content_language] unless options[:content_language].nil?
3354
+ newd["Content-type"] = options[:content_type] unless options[:content_type].nil?
3355
+
3356
+ f = open(furi, newd)
3357
+ end
3358
+
3359
+ data = f.read
3360
+ f.close
3361
+ rescue => e
3362
+ $stderr << "Rescued in parse: "+e.to_s+"\n" if $debug # My addition
3363
+ result['bozo'] = true
3364
+ result['bozo_exception'] = e
3365
+ data = ''
3366
+ f = nil
3367
+ end
3368
+ begin
3369
+ if f.meta
3370
+ result['etag'] = options[:etag] || f.meta['etag']
3371
+ result['modified'] = options[:modified] || f.last_modified
3372
+ result['url'] = f.base_uri.to_s
3373
+ result['status'] = f.status[0] || 200
3374
+ result['headers'] = f.meta
3375
+ result['headers']['content-location'] ||= options[:content_location] unless options[:content_location].nil?
3376
+ result['headers']['content-language'] ||= options[:content_language] unless options[:content_language].nil?
3377
+ result['headers']['content-type'] ||= options[:content_type] unless options[:content_type].nil?
3378
+ end
3379
+ rescue NoMethodError
3380
+ result['headers'] = {}
3381
+ result['etag'] = result['headers']['etag'] = options[:etag] unless options[:etag].nil?
3382
+ result['modified'] = result['headers']['last-modified'] = options[:modified] unless options[:modified].nil?
3383
+ unless options[:content_location].nil?
3384
+ result['headers']['content-location'] = options[:content_location]
3385
+ end
3386
+ unless options[:content_language].nil?
3387
+ result['headers']['content-language'] = options[:content_language]
3388
+ end
3389
+ unless options[:content_type].nil?
3390
+ result['headers']['content-type'] = options[:content_type]
3391
+ end
3392
+ end
3393
+
3394
+
3395
+ # there are four encodings to keep track of:
3396
+ # - http_encoding is the encoding declared in the Content-Type HTTP header
3397
+ # - xml_encoding is the encoding declared in the <?xml declaration
3398
+ # - sniffed_encoding is the encoding sniffed from the first 4 bytes of the XML data
3399
+ # - result['encoding'] is the actual encoding, as per RFC 3023 and a variety of other conflicting specifications
3400
+ http_headers = result['headers']
3401
+ result['encoding'], http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type =
3402
+ self.getCharacterEncoding(f,data)
3403
+
3404
+ if not http_headers.empty? and not acceptable_content_type
3405
+ if http_headers.has_key?('content-type')
3406
+ bozo_message = "#{http_headers['content-type']} is not an XML media type"
3407
+ else
3408
+ bozo_message = 'no Content-type specified'
3409
+ end
3410
+ result['bozo'] = true
3411
+ result['bozo_exception'] = NonXMLContentType.new(bozo_message) # I get to care about this, cuz Mark says I should.
3412
+ end
3413
+ result['version'], data = self.stripDoctype(data)
3414
+ baseuri = http_headers['content-location'] || result['href']
3415
+ baselang = http_headers['content-language']
3416
+
3417
+ # if server sent 304, we're done
3418
+ if result['status'] == 304
3419
+ result['version'] = ''
3420
+ result['debug_message'] = "The feed has not changed since you last checked, " +
3421
+ "so the server sent no data. This is a feature, not a bug!"
3422
+ return result
3423
+ end
3424
+
3425
+ # if there was a problem downloading, we're done
3426
+ if data.nil? or data.empty?
3427
+ return result
3428
+ end
3429
+
3430
+ # determine character encoding
3431
+ use_strict_parser = false
3432
+ known_encoding = false
3433
+ tried_encodings = []
3434
+ proposed_encoding = nil
3435
+ # try: HTTP encoding, declared XML encoding, encoding sniffed from BOM
3436
+ [result['encoding'], xml_encoding, sniffed_xml_encoding].each do |proposed_encoding|
3437
+ next if proposed_encoding.nil? or proposed_encoding.empty?
3438
+ next if tried_encodings.include? proposed_encoding
3439
+ tried_encodings << proposed_encoding
3440
+ begin
3441
+ data = self.toUTF8(data, proposed_encoding)
3442
+ known_encoding = use_strict_parser = true
3443
+ break
3444
+ rescue
3445
+ end
3446
+ end
3447
+ # if no luck and we have auto-detection library, try that
3448
+ if not known_encoding and $chardet
3449
+ begin
3450
+ proposed_encoding = CharDet.detect(data)['encoding']
3451
+ if proposed_encoding and not tried_encodings.include?proposed_encoding
3452
+ tried_encodings << proposed_encoding
3453
+ data = self.toUTF8(data, proposed_encoding)
3454
+ known_encoding = use_strict_parser = true
3455
+ end
3456
+ rescue
3457
+ end
3458
+ end
3459
+
3460
+
3461
+
3462
+ # if still no luck and we haven't tried utf-8 yet, try that
3463
+ if not known_encoding and not tried_encodings.include?'utf-8'
3464
+ begin
3465
+ proposed_encoding = 'utf-8'
3466
+ tried_encodings << proposed_encoding
3467
+ data = self.toUTF8(data, proposed_encoding)
3468
+ known_encoding = use_strict_parser = true
3469
+ rescue
3470
+ end
3471
+ end
3472
+ # if still no luck and we haven't tried windows-1252 yet, try that
3473
+ if not known_encoding and not tried_encodings.include?'windows-1252'
3474
+ begin
3475
+ proposed_encdoing = 'windows-1252'
3476
+ tried_encodings << proposed_encoding
3477
+ data = self.toUTF8(data, proposed_encoding)
3478
+ known_encoding = use_strict_parser = true
3479
+ rescue
3480
+ end
3481
+ end
3482
+
3483
+ # NOTE this isn't in FeedParser.py 4.1
3484
+ # if still no luck and we haven't tried iso-8859-2 yet, try that.
3485
+ #if not known_encoding and not tried_encodings.include?'iso-8859-2'
3486
+ # begin
3487
+ # proposed_encoding = 'iso-8859-2'
3488
+ # tried_encodings << proposed_encoding
3489
+ # data = self.toUTF8(data, proposed_encoding)
3490
+ # known_encoding = use_strict_parser = true
3491
+ # rescue
3492
+ # end
3493
+ #end
3494
+
3495
+
3496
+ # if still no luck, give up
3497
+ if not known_encoding
3498
+ result['bozo'] = true
3499
+ result['bozo_exception'] = CharacterEncodingUnknown.new("document encoding unknown, I tried #{result['encoding']}, #{xml_encoding}, utf-8 and windows-1252 but nothing worked")
3500
+ result['encoding'] = ''
3501
+ elsif proposed_encoding != result['encoding']
3502
+ result['bozo'] = true
3503
+ result['bozo_exception'] = CharacterEncodingOverride.new("documented declared as #{result['encoding']}, but parsed as #{proposed_encoding}")
3504
+ result['encoding'] = proposed_encoding
3505
+ end
3506
+
3507
+ if use_strict_parser
3508
+ # initialize the SAX parser
3509
+ saxparser = XML::SAX::Helpers::ParserFactory.makeParser("XML::Parser::SAXDriver")
3510
+ feedparser = StrictFeedParser.new(baseuri, baselang, 'utf-8')
3511
+ saxparser.setDocumentHandler(feedparser)
3512
+ saxparser.setDTDHandler(feedparser)
3513
+ saxparser.setEntityResolver(feedparser)
3514
+ saxparser.setErrorHandler(feedparser)
3515
+
3516
+ inputdata = XML::SAX::InputSource.new('parsedfeed')
3517
+ inputdata.setByteStream(StringIO.new(data))
3518
+ begin
3519
+ saxparser.parse(inputdata)
3520
+ rescue Exception => parseerr # resparse
3521
+ if $debug
3522
+ $stderr << "xml parsing failed\n"
3523
+ $stderr << parseerr.to_s+"\n" # Hrmph.
3524
+ end
3525
+ result['bozo'] = true
3526
+ result['bozo_exception'] = feedparser.exc || e
3527
+ use_strict_parser = false
3528
+ end
3529
+ end
3530
+ if not use_strict_parser
3531
+ feedparser = LooseFeedParser.new(baseuri, baselang, (known_encoding and 'utf-8' or ''))
3532
+ feedparser.parse(data)
3533
+ $stderr << "Using LooseFeed\n\n" if $debug
3534
+ end
3535
+ result['feed'] = feedparser.feeddata
3536
+ result['entries'] = feedparser.entries
3537
+ result['version'] = result['version'] || feedparser.version
3538
+ result['namespaces'] = feedparser.namespacesInUse
3539
+ return result
3540
+ end
3541
+ end # End FeedParser module
3542
+
3543
+ class Serializer
3544
+ def initialize(results)
3545
+ @results = results
3546
+ end
3547
+ end
3548
+
3549
+ class TextSerializer < Serializer
3550
+ def write(stream=$stdout)
3551
+ writer(stream, @results, '')
3552
+ end
3553
+
3554
+ def writer(stream, node, prefix)
3555
+ return if (node.nil? or node.empty?)
3556
+ if node.methods.include?'keys'
3557
+ node.keys.sort.each do |key|
3558
+ next if ['description','link'].include? key
3559
+ next if node.has_key? k+'_detail'
3560
+ next if node.has_key? k+'_parsed'
3561
+ writer(stream,node[k], prefix+k+'.')
3562
+ end
3563
+ elsif node.class == Array
3564
+ node.each_with_index do |thing, index|
3565
+ writer(stream, thing, prefix[0..-2] + '[' + index.to_s + '].')
3566
+ end
3567
+ else
3568
+ begin
3569
+ s = u(node.to_s)
3570
+ stream << prefix[0..-2]
3571
+ stream << '='
3572
+ stream << s
3573
+ stream << "\n"
3574
+ rescue
3575
+ end
3576
+ end
3577
+ end
3578
+ end
3579
+
3580
+ class PprintSerializer < Serializer # FIXME ? use pp instead?
3581
+ def write(stream = $stdout)
3582
+ stream << @results['href'].to_s + "\n\n"
3583
+ pp(@results)
3584
+ stream << "\n"
3585
+ end
3586
+ end
3587
+
3588
+
3589
+ require 'optparse'
3590
+ require 'ostruct'
3591
+ options = OpenStruct.new
3592
+ options.etag = options.modified = options.agent = options.referrer = nil
3593
+ options.content_language = options.content_location = options.ctype = nil
3594
+ options.format = 'pprint'
3595
+ options.compatible = $compatible
3596
+ options.verbose = false
3597
+
3598
+ opts = OptionParser.new do |opts|
3599
+ opts.banner
3600
+ opts.separator ""
3601
+ opts.on("-A", "--user-agent [AGENT]",
3602
+ "User-Agent for HTTP URLs") {|agent|
3603
+ options.agent = agent
3604
+ }
3605
+
3606
+ opts.on("-e", "--referrer [URL]",
3607
+ "Referrer for HTTP URLs") {|referrer|
3608
+ options.referrer = referrer
3609
+ }
3610
+
3611
+ opts.on("-t", "--etag [TAG]",
3612
+ "ETag/If-None-Match for HTTP URLs") {|etag|
3613
+ options.etag = etag
3614
+ }
3615
+
3616
+ opts.on("-m", "--last-modified [DATE]",
3617
+ "Last-modified/If-Modified-Since for HTTP URLs (any supported date format)") {|modified|
3618
+ options.modified = modified
3619
+ }
3620
+
3621
+ opts.on("-f", "--format [FORMAT]", [:text, :pprint],
3622
+ "output resutls in FORMAT (text, pprint)") {|format|
3623
+ options.format = format
3624
+ }
3625
+
3626
+ opts.on("-v", "--[no-]verbose",
3627
+ "write debugging information to stderr") {|v|
3628
+ options.verbose = v
3629
+ }
3630
+
3631
+ opts.on("-c", "--[no-]compatible",
3632
+ "strip element attributes like feedparser.py 4.1 (default)") {|comp|
3633
+ options.compatible = comp
3634
+ }
3635
+ opts.on("-l", "--content-location [LOCATION]",
3636
+ "default Content-Location HTTP header") {|loc|
3637
+ options.content_location = loc
3638
+ }
3639
+ opts.on("-a", "--content-language [LANG]",
3640
+ "default Content-Language HTTP header") {|lang|
3641
+ options.content_language = lang
3642
+ }
3643
+ opts.on("-t", "--content-type [TYPE]",
3644
+ "default Content-type HTTP header") {|ctype|
3645
+ options.ctype = ctype
3646
+ }
3647
+ end
3648
+
3649
+ opts.parse!(ARGV)
3650
+ $debug = true if options.verbose
3651
+ $compatible = options.compatible unless options.compatible.nil?
3652
+
3653
+ if options.format == :text
3654
+ serializer = TextSerializer
3655
+ else
3656
+ serializer = PprintSerializer
3657
+ end
3658
+ args = *ARGV.dup
3659
+ unless args.nil?
3660
+ args.each do |url| # opts.parse! removes everything but the urls from the command line
3661
+ results = FeedParser.parse(url, :etag => options.etag,
3662
+ :modified => options.modified,
3663
+ :agent => options.agent,
3664
+ :referrer => options.referrer,
3665
+ :content_location => options.content_location,
3666
+ :content_language => options.content_language,
3667
+ :content_type => options.ctype
3668
+ )
3669
+ serializer.new(results).write($stdout)
3670
+ end
3671
+ end