rfeedparser 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3332) hide show
  1. data/LICENSE +68 -0
  2. data/README +28 -0
  3. data/RUBY-TESTING +60 -0
  4. data/lib/feedparser.rb +3671 -0
  5. data/tests/feedparserserver.rb +115 -0
  6. data/tests/feedparsertest.rb +196 -0
  7. data/tests/illformed/amp/amp01.xml +9 -0
  8. data/tests/illformed/amp/amp02.xml +9 -0
  9. data/tests/illformed/amp/amp03.xml +9 -0
  10. data/tests/illformed/amp/amp04.xml +9 -0
  11. data/tests/illformed/amp/amp05.xml +9 -0
  12. data/tests/illformed/amp/amp06.xml +9 -0
  13. data/tests/illformed/amp/amp07.xml +9 -0
  14. data/tests/illformed/amp/amp08.xml +9 -0
  15. data/tests/illformed/amp/amp09.xml +9 -0
  16. data/tests/illformed/amp/amp10.xml +9 -0
  17. data/tests/illformed/amp/amp11.xml +9 -0
  18. data/tests/illformed/amp/amp12.xml +9 -0
  19. data/tests/illformed/amp/amp13.xml +9 -0
  20. data/tests/illformed/amp/amp14.xml +9 -0
  21. data/tests/illformed/amp/amp15.xml +9 -0
  22. data/tests/illformed/amp/amp16.xml +9 -0
  23. data/tests/illformed/amp/amp17.xml +9 -0
  24. data/tests/illformed/amp/amp18.xml +9 -0
  25. data/tests/illformed/amp/amp19.xml +9 -0
  26. data/tests/illformed/amp/amp20.xml +9 -0
  27. data/tests/illformed/amp/amp21.xml +9 -0
  28. data/tests/illformed/amp/amp22.xml +9 -0
  29. data/tests/illformed/amp/amp23.xml +9 -0
  30. data/tests/illformed/amp/amp24.xml +9 -0
  31. data/tests/illformed/amp/amp25.xml +9 -0
  32. data/tests/illformed/amp/amp26.xml +9 -0
  33. data/tests/illformed/amp/amp27.xml +9 -0
  34. data/tests/illformed/amp/amp28.xml +9 -0
  35. data/tests/illformed/amp/amp29.xml +9 -0
  36. data/tests/illformed/amp/amp30.xml +9 -0
  37. data/tests/illformed/amp/amp31.xml +9 -0
  38. data/tests/illformed/amp/amp32.xml +9 -0
  39. data/tests/illformed/amp/amp33.xml +9 -0
  40. data/tests/illformed/amp/amp34.xml +9 -0
  41. data/tests/illformed/amp/amp35.xml +9 -0
  42. data/tests/illformed/amp/amp36.xml +9 -0
  43. data/tests/illformed/amp/amp37.xml +9 -0
  44. data/tests/illformed/amp/amp38.xml +9 -0
  45. data/tests/illformed/amp/amp39.xml +9 -0
  46. data/tests/illformed/amp/amp40.xml +9 -0
  47. data/tests/illformed/amp/amp41.xml +9 -0
  48. data/tests/illformed/amp/amp42.xml +9 -0
  49. data/tests/illformed/amp/amp43.xml +9 -0
  50. data/tests/illformed/amp/amp44.xml +9 -0
  51. data/tests/illformed/amp/amp45.xml +9 -0
  52. data/tests/illformed/amp/amp46.xml +9 -0
  53. data/tests/illformed/amp/amp47.xml +9 -0
  54. data/tests/illformed/amp/amp48.xml +9 -0
  55. data/tests/illformed/amp/amp49.xml +9 -0
  56. data/tests/illformed/amp/amp50.xml +9 -0
  57. data/tests/illformed/amp/amp51.xml +9 -0
  58. data/tests/illformed/amp/amp52.xml +9 -0
  59. data/tests/illformed/amp/amp53.xml +9 -0
  60. data/tests/illformed/amp/amp54.xml +9 -0
  61. data/tests/illformed/amp/amp55.xml +9 -0
  62. data/tests/illformed/amp/amp56.xml +9 -0
  63. data/tests/illformed/amp/amp57.xml +9 -0
  64. data/tests/illformed/amp/amp58.xml +9 -0
  65. data/tests/illformed/amp/amp59.xml +9 -0
  66. data/tests/illformed/amp/amp60.xml +9 -0
  67. data/tests/illformed/amp/amp61.xml +9 -0
  68. data/tests/illformed/amp/amp62.xml +9 -0
  69. data/tests/illformed/amp/amp63.xml +9 -0
  70. data/tests/illformed/amp/amp64.xml +9 -0
  71. data/tests/illformed/atom/atom_namespace_1.xml +7 -0
  72. data/tests/illformed/atom/atom_namespace_2.xml +7 -0
  73. data/tests/illformed/atom/atom_namespace_3.xml +7 -0
  74. data/tests/illformed/atom/atom_namespace_4.xml +7 -0
  75. data/tests/illformed/atom/atom_namespace_5.xml +7 -0
  76. data/tests/illformed/atom/entry_author_email.xml +13 -0
  77. data/tests/illformed/atom/entry_author_homepage.xml +13 -0
  78. data/tests/illformed/atom/entry_author_map_author.xml +13 -0
  79. data/tests/illformed/atom/entry_author_map_author_2.xml +12 -0
  80. data/tests/illformed/atom/entry_author_name.xml +13 -0
  81. data/tests/illformed/atom/entry_author_uri.xml +13 -0
  82. data/tests/illformed/atom/entry_author_url.xml +13 -0
  83. data/tests/illformed/atom/entry_content_mode_base64.xml +11 -0
  84. data/tests/illformed/atom/entry_content_mode_escaped.xml +9 -0
  85. data/tests/illformed/atom/entry_content_type.xml +9 -0
  86. data/tests/illformed/atom/entry_content_type_text_plain.xml +9 -0
  87. data/tests/illformed/atom/entry_content_value.xml +9 -0
  88. data/tests/illformed/atom/entry_contributor_email.xml +13 -0
  89. data/tests/illformed/atom/entry_contributor_homepage.xml +13 -0
  90. data/tests/illformed/atom/entry_contributor_multiple.xml +18 -0
  91. data/tests/illformed/atom/entry_contributor_name.xml +13 -0
  92. data/tests/illformed/atom/entry_contributor_uri.xml +13 -0
  93. data/tests/illformed/atom/entry_contributor_url.xml +13 -0
  94. data/tests/illformed/atom/entry_id.xml +9 -0
  95. data/tests/illformed/atom/entry_id_map_guid.xml +9 -0
  96. data/tests/illformed/atom/entry_link_alternate_map_link.xml +9 -0
  97. data/tests/illformed/atom/entry_link_alternate_map_link_2.xml +9 -0
  98. data/tests/illformed/atom/entry_link_href.xml +9 -0
  99. data/tests/illformed/atom/entry_link_multiple.xml +10 -0
  100. data/tests/illformed/atom/entry_link_rel.xml +9 -0
  101. data/tests/illformed/atom/entry_link_title.xml +9 -0
  102. data/tests/illformed/atom/entry_link_type.xml +9 -0
  103. data/tests/illformed/atom/entry_summary.xml +9 -0
  104. data/tests/illformed/atom/entry_summary_base64.xml +11 -0
  105. data/tests/illformed/atom/entry_summary_base64_2.xml +11 -0
  106. data/tests/illformed/atom/entry_summary_content_mode_base64.xml +11 -0
  107. data/tests/illformed/atom/entry_summary_content_mode_escaped.xml +9 -0
  108. data/tests/illformed/atom/entry_summary_content_type.xml +9 -0
  109. data/tests/illformed/atom/entry_summary_content_type_text_plain.xml +9 -0
  110. data/tests/illformed/atom/entry_summary_content_value.xml +9 -0
  111. data/tests/illformed/atom/entry_summary_escaped_markup.xml +9 -0
  112. data/tests/illformed/atom/entry_summary_inline_markup.xml +9 -0
  113. data/tests/illformed/atom/entry_summary_inline_markup_2.xml +9 -0
  114. data/tests/illformed/atom/entry_summary_naked_markup.xml +9 -0
  115. data/tests/illformed/atom/entry_summary_text_plain.xml +9 -0
  116. data/tests/illformed/atom/entry_title.xml +9 -0
  117. data/tests/illformed/atom/entry_title_base64.xml +11 -0
  118. data/tests/illformed/atom/entry_title_base64_2.xml +11 -0
  119. data/tests/illformed/atom/entry_title_content_mode_base64.xml +11 -0
  120. data/tests/illformed/atom/entry_title_content_mode_escaped.xml +9 -0
  121. data/tests/illformed/atom/entry_title_content_type.xml +9 -0
  122. data/tests/illformed/atom/entry_title_content_type_text_plain.xml +9 -0
  123. data/tests/illformed/atom/entry_title_content_value.xml +9 -0
  124. data/tests/illformed/atom/entry_title_escaped_markup.xml +9 -0
  125. data/tests/illformed/atom/entry_title_inline_markup.xml +9 -0
  126. data/tests/illformed/atom/entry_title_inline_markup_2.xml +9 -0
  127. data/tests/illformed/atom/entry_title_naked_markup.xml +9 -0
  128. data/tests/illformed/atom/entry_title_text_plain.xml +9 -0
  129. data/tests/illformed/atom/entry_title_text_plain_brackets.xml +9 -0
  130. data/tests/illformed/atom/feed_author_email.xml +11 -0
  131. data/tests/illformed/atom/feed_author_homepage.xml +11 -0
  132. data/tests/illformed/atom/feed_author_map_author.xml +11 -0
  133. data/tests/illformed/atom/feed_author_map_author_2.xml +10 -0
  134. data/tests/illformed/atom/feed_author_name.xml +11 -0
  135. data/tests/illformed/atom/feed_author_uri.xml +11 -0
  136. data/tests/illformed/atom/feed_author_url.xml +11 -0
  137. data/tests/illformed/atom/feed_contributor_email.xml +11 -0
  138. data/tests/illformed/atom/feed_contributor_homepage.xml +11 -0
  139. data/tests/illformed/atom/feed_contributor_multiple.xml +16 -0
  140. data/tests/illformed/atom/feed_contributor_name.xml +11 -0
  141. data/tests/illformed/atom/feed_contributor_uri.xml +11 -0
  142. data/tests/illformed/atom/feed_contributor_url.xml +11 -0
  143. data/tests/illformed/atom/feed_copyright.xml +7 -0
  144. data/tests/illformed/atom/feed_copyright_base64.xml +9 -0
  145. data/tests/illformed/atom/feed_copyright_base64_2.xml +9 -0
  146. data/tests/illformed/atom/feed_copyright_content_mode_base64.xml +9 -0
  147. data/tests/illformed/atom/feed_copyright_content_mode_escaped.xml +7 -0
  148. data/tests/illformed/atom/feed_copyright_content_type.xml +7 -0
  149. data/tests/illformed/atom/feed_copyright_content_type_text_plain.xml +7 -0
  150. data/tests/illformed/atom/feed_copyright_content_value.xml +7 -0
  151. data/tests/illformed/atom/feed_copyright_escaped_markup.xml +7 -0
  152. data/tests/illformed/atom/feed_copyright_inline_markup.xml +7 -0
  153. data/tests/illformed/atom/feed_copyright_inline_markup_2.xml +7 -0
  154. data/tests/illformed/atom/feed_copyright_naked_markup.xml +7 -0
  155. data/tests/illformed/atom/feed_copyright_text_plain.xml +7 -0
  156. data/tests/illformed/atom/feed_generator.xml +7 -0
  157. data/tests/illformed/atom/feed_generator_name.xml +7 -0
  158. data/tests/illformed/atom/feed_generator_url.xml +7 -0
  159. data/tests/illformed/atom/feed_generator_version.xml +7 -0
  160. data/tests/illformed/atom/feed_id.xml +7 -0
  161. data/tests/illformed/atom/feed_id_map_guid.xml +7 -0
  162. data/tests/illformed/atom/feed_info.xml +7 -0
  163. data/tests/illformed/atom/feed_info_base64.xml +9 -0
  164. data/tests/illformed/atom/feed_info_base64_2.xml +9 -0
  165. data/tests/illformed/atom/feed_info_content_mode_base64.xml +9 -0
  166. data/tests/illformed/atom/feed_info_content_mode_escaped.xml +7 -0
  167. data/tests/illformed/atom/feed_info_content_type.xml +7 -0
  168. data/tests/illformed/atom/feed_info_content_type_text_plain.xml +7 -0
  169. data/tests/illformed/atom/feed_info_content_value.xml +7 -0
  170. data/tests/illformed/atom/feed_info_escaped_markup.xml +7 -0
  171. data/tests/illformed/atom/feed_info_inline_markup.xml +7 -0
  172. data/tests/illformed/atom/feed_info_inline_markup_2.xml +7 -0
  173. data/tests/illformed/atom/feed_info_naked_markup.xml +7 -0
  174. data/tests/illformed/atom/feed_info_text_plain.xml +7 -0
  175. data/tests/illformed/atom/feed_link_alternate_map_link.xml +7 -0
  176. data/tests/illformed/atom/feed_link_alternate_map_link_2.xml +7 -0
  177. data/tests/illformed/atom/feed_link_href.xml +7 -0
  178. data/tests/illformed/atom/feed_link_multiple.xml +8 -0
  179. data/tests/illformed/atom/feed_link_rel.xml +7 -0
  180. data/tests/illformed/atom/feed_link_title.xml +7 -0
  181. data/tests/illformed/atom/feed_link_type.xml +7 -0
  182. data/tests/illformed/atom/feed_tagline.xml +7 -0
  183. data/tests/illformed/atom/feed_tagline_base64.xml +9 -0
  184. data/tests/illformed/atom/feed_tagline_base64_2.xml +9 -0
  185. data/tests/illformed/atom/feed_tagline_content_mode_base64.xml +9 -0
  186. data/tests/illformed/atom/feed_tagline_content_mode_escaped.xml +7 -0
  187. data/tests/illformed/atom/feed_tagline_content_type.xml +7 -0
  188. data/tests/illformed/atom/feed_tagline_content_type_text_plain.xml +7 -0
  189. data/tests/illformed/atom/feed_tagline_content_value.xml +7 -0
  190. data/tests/illformed/atom/feed_tagline_escaped_markup.xml +7 -0
  191. data/tests/illformed/atom/feed_tagline_inline_markup.xml +7 -0
  192. data/tests/illformed/atom/feed_tagline_inline_markup_2.xml +7 -0
  193. data/tests/illformed/atom/feed_tagline_naked_markup.xml +7 -0
  194. data/tests/illformed/atom/feed_tagline_text_plain.xml +7 -0
  195. data/tests/illformed/atom/feed_title.xml +7 -0
  196. data/tests/illformed/atom/feed_title_base64.xml +9 -0
  197. data/tests/illformed/atom/feed_title_base64_2.xml +9 -0
  198. data/tests/illformed/atom/feed_title_content_mode_base64.xml +9 -0
  199. data/tests/illformed/atom/feed_title_content_mode_escaped.xml +7 -0
  200. data/tests/illformed/atom/feed_title_content_type.xml +7 -0
  201. data/tests/illformed/atom/feed_title_content_type_text_plain.xml +7 -0
  202. data/tests/illformed/atom/feed_title_content_value.xml +7 -0
  203. data/tests/illformed/atom/feed_title_escaped_markup.xml +7 -0
  204. data/tests/illformed/atom/feed_title_inline_markup.xml +7 -0
  205. data/tests/illformed/atom/feed_title_inline_markup_2.xml +7 -0
  206. data/tests/illformed/atom/feed_title_naked_markup.xml +7 -0
  207. data/tests/illformed/atom/feed_title_text_plain.xml +7 -0
  208. data/tests/illformed/atom/relative_uri.xml +7 -0
  209. data/tests/illformed/atom/relative_uri_inherit.xml +7 -0
  210. data/tests/illformed/atom/relative_uri_inherit_2.xml +7 -0
  211. data/tests/illformed/atom10/atom10_namespace.xml +7 -0
  212. data/tests/illformed/atom10/atom10_version.xml +6 -0
  213. data/tests/illformed/atom10/entry_author_email.xml +13 -0
  214. data/tests/illformed/atom10/entry_author_map_author.xml +13 -0
  215. data/tests/illformed/atom10/entry_author_map_author_2.xml +12 -0
  216. data/tests/illformed/atom10/entry_author_name.xml +13 -0
  217. data/tests/illformed/atom10/entry_author_uri.xml +13 -0
  218. data/tests/illformed/atom10/entry_author_url.xml +13 -0
  219. data/tests/illformed/atom10/entry_category_label.xml +9 -0
  220. data/tests/illformed/atom10/entry_category_scheme.xml +9 -0
  221. data/tests/illformed/atom10/entry_category_term.xml +9 -0
  222. data/tests/illformed/atom10/entry_content_application_xml.xml +9 -0
  223. data/tests/illformed/atom10/entry_content_base64.xml +11 -0
  224. data/tests/illformed/atom10/entry_content_base64_2.xml +11 -0
  225. data/tests/illformed/atom10/entry_content_escaped_markup.xml +9 -0
  226. data/tests/illformed/atom10/entry_content_inline_markup.xml +9 -0
  227. data/tests/illformed/atom10/entry_content_inline_markup_2.xml +9 -0
  228. data/tests/illformed/atom10/entry_content_src.xml +9 -0
  229. data/tests/illformed/atom10/entry_content_text_plain.xml +9 -0
  230. data/tests/illformed/atom10/entry_content_text_plain_brackets.xml +9 -0
  231. data/tests/illformed/atom10/entry_content_type.xml +9 -0
  232. data/tests/illformed/atom10/entry_content_type_text.xml +9 -0
  233. data/tests/illformed/atom10/entry_content_value.xml +9 -0
  234. data/tests/illformed/atom10/entry_contributor_email.xml +13 -0
  235. data/tests/illformed/atom10/entry_contributor_multiple.xml +18 -0
  236. data/tests/illformed/atom10/entry_contributor_name.xml +13 -0
  237. data/tests/illformed/atom10/entry_contributor_uri.xml +13 -0
  238. data/tests/illformed/atom10/entry_contributor_url.xml +13 -0
  239. data/tests/illformed/atom10/entry_id.xml +9 -0
  240. data/tests/illformed/atom10/entry_id_map_guid.xml +9 -0
  241. data/tests/illformed/atom10/entry_id_no_normalization_1.xml +9 -0
  242. data/tests/illformed/atom10/entry_id_no_normalization_2.xml +9 -0
  243. data/tests/illformed/atom10/entry_id_no_normalization_3.xml +9 -0
  244. data/tests/illformed/atom10/entry_id_no_normalization_4.xml +9 -0
  245. data/tests/illformed/atom10/entry_id_no_normalization_5.xml +9 -0
  246. data/tests/illformed/atom10/entry_id_no_normalization_6.xml +9 -0
  247. data/tests/illformed/atom10/entry_id_no_normalization_7.xml +9 -0
  248. data/tests/illformed/atom10/entry_link_alternate_map_link.xml +9 -0
  249. data/tests/illformed/atom10/entry_link_alternate_map_link_2.xml +9 -0
  250. data/tests/illformed/atom10/entry_link_alternate_map_link_3.xml +11 -0
  251. data/tests/illformed/atom10/entry_link_href.xml +9 -0
  252. data/tests/illformed/atom10/entry_link_hreflang.xml +9 -0
  253. data/tests/illformed/atom10/entry_link_length.xml +9 -0
  254. data/tests/illformed/atom10/entry_link_multiple.xml +10 -0
  255. data/tests/illformed/atom10/entry_link_no_rel.xml +9 -0
  256. data/tests/illformed/atom10/entry_link_rel.xml +9 -0
  257. data/tests/illformed/atom10/entry_link_rel_enclosure.xml +9 -0
  258. data/tests/illformed/atom10/entry_link_rel_enclosure_map_enclosure_length.xml +9 -0
  259. data/tests/illformed/atom10/entry_link_rel_enclosure_map_enclosure_type.xml +9 -0
  260. data/tests/illformed/atom10/entry_link_rel_enclosure_map_enclosure_url.xml +9 -0
  261. data/tests/illformed/atom10/entry_link_rel_other.xml +9 -0
  262. data/tests/illformed/atom10/entry_link_rel_related.xml +9 -0
  263. data/tests/illformed/atom10/entry_link_rel_self.xml +9 -0
  264. data/tests/illformed/atom10/entry_link_rel_via.xml +9 -0
  265. data/tests/illformed/atom10/entry_link_title.xml +9 -0
  266. data/tests/illformed/atom10/entry_link_type.xml +9 -0
  267. data/tests/illformed/atom10/entry_rights.xml +9 -0
  268. data/tests/illformed/atom10/entry_rights_content_value.xml +9 -0
  269. data/tests/illformed/atom10/entry_rights_escaped_markup.xml +9 -0
  270. data/tests/illformed/atom10/entry_rights_inline_markup.xml +9 -0
  271. data/tests/illformed/atom10/entry_rights_inline_markup_2.xml +9 -0
  272. data/tests/illformed/atom10/entry_rights_text_plain.xml +9 -0
  273. data/tests/illformed/atom10/entry_rights_text_plain_brackets.xml +9 -0
  274. data/tests/illformed/atom10/entry_rights_type_default.xml +9 -0
  275. data/tests/illformed/atom10/entry_rights_type_text.xml +9 -0
  276. data/tests/illformed/atom10/entry_source_author_email.xml +15 -0
  277. data/tests/illformed/atom10/entry_source_author_map_author.xml +15 -0
  278. data/tests/illformed/atom10/entry_source_author_map_author_2.xml +14 -0
  279. data/tests/illformed/atom10/entry_source_author_name.xml +15 -0
  280. data/tests/illformed/atom10/entry_source_author_uri.xml +15 -0
  281. data/tests/illformed/atom10/entry_source_category_label.xml +11 -0
  282. data/tests/illformed/atom10/entry_source_category_scheme.xml +11 -0
  283. data/tests/illformed/atom10/entry_source_category_term.xml +11 -0
  284. data/tests/illformed/atom10/entry_source_contributor_email.xml +15 -0
  285. data/tests/illformed/atom10/entry_source_contributor_multiple.xml +20 -0
  286. data/tests/illformed/atom10/entry_source_contributor_name.xml +15 -0
  287. data/tests/illformed/atom10/entry_source_contributor_uri.xml +15 -0
  288. data/tests/illformed/atom10/entry_source_generator.xml +11 -0
  289. data/tests/illformed/atom10/entry_source_generator_name.xml +11 -0
  290. data/tests/illformed/atom10/entry_source_generator_uri.xml +11 -0
  291. data/tests/illformed/atom10/entry_source_generator_version.xml +11 -0
  292. data/tests/illformed/atom10/entry_source_icon.xml +11 -0
  293. data/tests/illformed/atom10/entry_source_id.xml +11 -0
  294. data/tests/illformed/atom10/entry_source_link_alternate_map_link.xml +11 -0
  295. data/tests/illformed/atom10/entry_source_link_alternate_map_link_2.xml +11 -0
  296. data/tests/illformed/atom10/entry_source_link_href.xml +11 -0
  297. data/tests/illformed/atom10/entry_source_link_hreflang.xml +11 -0
  298. data/tests/illformed/atom10/entry_source_link_length.xml +11 -0
  299. data/tests/illformed/atom10/entry_source_link_multiple.xml +12 -0
  300. data/tests/illformed/atom10/entry_source_link_no_rel.xml +11 -0
  301. data/tests/illformed/atom10/entry_source_link_rel.xml +11 -0
  302. data/tests/illformed/atom10/entry_source_link_rel_other.xml +11 -0
  303. data/tests/illformed/atom10/entry_source_link_rel_related.xml +11 -0
  304. data/tests/illformed/atom10/entry_source_link_rel_self.xml +11 -0
  305. data/tests/illformed/atom10/entry_source_link_rel_via.xml +11 -0
  306. data/tests/illformed/atom10/entry_source_link_title.xml +11 -0
  307. data/tests/illformed/atom10/entry_source_link_type.xml +11 -0
  308. data/tests/illformed/atom10/entry_source_logo.xml +11 -0
  309. data/tests/illformed/atom10/entry_source_rights.xml +11 -0
  310. data/tests/illformed/atom10/entry_source_rights_base64.xml +13 -0
  311. data/tests/illformed/atom10/entry_source_rights_base64_2.xml +13 -0
  312. data/tests/illformed/atom10/entry_source_rights_content_type.xml +11 -0
  313. data/tests/illformed/atom10/entry_source_rights_content_type_text.xml +11 -0
  314. data/tests/illformed/atom10/entry_source_rights_content_value.xml +11 -0
  315. data/tests/illformed/atom10/entry_source_rights_escaped_markup.xml +11 -0
  316. data/tests/illformed/atom10/entry_source_rights_inline_markup.xml +11 -0
  317. data/tests/illformed/atom10/entry_source_rights_inline_markup_2.xml +11 -0
  318. data/tests/illformed/atom10/entry_source_rights_text_plain.xml +11 -0
  319. data/tests/illformed/atom10/entry_source_subittle_content_type_text.xml +11 -0
  320. data/tests/illformed/atom10/entry_source_subtitle.xml +11 -0
  321. data/tests/illformed/atom10/entry_source_subtitle_base64.xml +13 -0
  322. data/tests/illformed/atom10/entry_source_subtitle_base64_2.xml +13 -0
  323. data/tests/illformed/atom10/entry_source_subtitle_content_type.xml +11 -0
  324. data/tests/illformed/atom10/entry_source_subtitle_content_value.xml +11 -0
  325. data/tests/illformed/atom10/entry_source_subtitle_escaped_markup.xml +11 -0
  326. data/tests/illformed/atom10/entry_source_subtitle_inline_markup.xml +11 -0
  327. data/tests/illformed/atom10/entry_source_subtitle_inline_markup_2.xml +11 -0
  328. data/tests/illformed/atom10/entry_source_subtitle_text_plain.xml +11 -0
  329. data/tests/illformed/atom10/entry_source_title.xml +11 -0
  330. data/tests/illformed/atom10/entry_source_title_base64.xml +13 -0
  331. data/tests/illformed/atom10/entry_source_title_base64_2.xml +13 -0
  332. data/tests/illformed/atom10/entry_source_title_content_type.xml +11 -0
  333. data/tests/illformed/atom10/entry_source_title_content_type_text.xml +11 -0
  334. data/tests/illformed/atom10/entry_source_title_content_value.xml +11 -0
  335. data/tests/illformed/atom10/entry_source_title_escaped_markup.xml +11 -0
  336. data/tests/illformed/atom10/entry_source_title_inline_markup.xml +11 -0
  337. data/tests/illformed/atom10/entry_source_title_inline_markup_2.xml +11 -0
  338. data/tests/illformed/atom10/entry_source_title_text_plain.xml +11 -0
  339. data/tests/illformed/atom10/entry_summary.xml +9 -0
  340. data/tests/illformed/atom10/entry_summary_base64.xml +11 -0
  341. data/tests/illformed/atom10/entry_summary_base64_2.xml +11 -0
  342. data/tests/illformed/atom10/entry_summary_content_value.xml +9 -0
  343. data/tests/illformed/atom10/entry_summary_escaped_markup.xml +9 -0
  344. data/tests/illformed/atom10/entry_summary_inline_markup.xml +9 -0
  345. data/tests/illformed/atom10/entry_summary_inline_markup_2.xml +9 -0
  346. data/tests/illformed/atom10/entry_summary_text_plain.xml +9 -0
  347. data/tests/illformed/atom10/entry_summary_type_default.xml +9 -0
  348. data/tests/illformed/atom10/entry_summary_type_text.xml +9 -0
  349. data/tests/illformed/atom10/entry_title.xml +9 -0
  350. data/tests/illformed/atom10/entry_title_base64.xml +11 -0
  351. data/tests/illformed/atom10/entry_title_base64_2.xml +11 -0
  352. data/tests/illformed/atom10/entry_title_content_value.xml +9 -0
  353. data/tests/illformed/atom10/entry_title_escaped_markup.xml +9 -0
  354. data/tests/illformed/atom10/entry_title_inline_markup.xml +9 -0
  355. data/tests/illformed/atom10/entry_title_inline_markup_2.xml +9 -0
  356. data/tests/illformed/atom10/entry_title_text_plain.xml +9 -0
  357. data/tests/illformed/atom10/entry_title_text_plain_brackets.xml +9 -0
  358. data/tests/illformed/atom10/entry_title_type_default.xml +9 -0
  359. data/tests/illformed/atom10/entry_title_type_text.xml +9 -0
  360. data/tests/illformed/atom10/feed_author_email.xml +11 -0
  361. data/tests/illformed/atom10/feed_author_map_author.xml +11 -0
  362. data/tests/illformed/atom10/feed_author_map_author_2.xml +10 -0
  363. data/tests/illformed/atom10/feed_author_name.xml +11 -0
  364. data/tests/illformed/atom10/feed_author_uri.xml +11 -0
  365. data/tests/illformed/atom10/feed_author_url.xml +11 -0
  366. data/tests/illformed/atom10/feed_contributor_email.xml +11 -0
  367. data/tests/illformed/atom10/feed_contributor_multiple.xml +16 -0
  368. data/tests/illformed/atom10/feed_contributor_name.xml +11 -0
  369. data/tests/illformed/atom10/feed_contributor_uri.xml +11 -0
  370. data/tests/illformed/atom10/feed_contributor_url.xml +11 -0
  371. data/tests/illformed/atom10/feed_generator.xml +7 -0
  372. data/tests/illformed/atom10/feed_generator_name.xml +7 -0
  373. data/tests/illformed/atom10/feed_generator_url.xml +7 -0
  374. data/tests/illformed/atom10/feed_generator_version.xml +7 -0
  375. data/tests/illformed/atom10/feed_icon.xml +7 -0
  376. data/tests/illformed/atom10/feed_id.xml +7 -0
  377. data/tests/illformed/atom10/feed_id_map_guid.xml +7 -0
  378. data/tests/illformed/atom10/feed_link_alternate_map_link.xml +7 -0
  379. data/tests/illformed/atom10/feed_link_alternate_map_link_2.xml +7 -0
  380. data/tests/illformed/atom10/feed_link_href.xml +7 -0
  381. data/tests/illformed/atom10/feed_link_hreflang.xml +7 -0
  382. data/tests/illformed/atom10/feed_link_length.xml +7 -0
  383. data/tests/illformed/atom10/feed_link_multiple.xml +8 -0
  384. data/tests/illformed/atom10/feed_link_no_rel.xml +7 -0
  385. data/tests/illformed/atom10/feed_link_rel.xml +7 -0
  386. data/tests/illformed/atom10/feed_link_rel_other.xml +7 -0
  387. data/tests/illformed/atom10/feed_link_rel_related.xml +7 -0
  388. data/tests/illformed/atom10/feed_link_rel_self.xml +7 -0
  389. data/tests/illformed/atom10/feed_link_rel_via.xml +7 -0
  390. data/tests/illformed/atom10/feed_link_title.xml +7 -0
  391. data/tests/illformed/atom10/feed_link_type.xml +7 -0
  392. data/tests/illformed/atom10/feed_logo.xml +7 -0
  393. data/tests/illformed/atom10/feed_rights.xml +7 -0
  394. data/tests/illformed/atom10/feed_rights_base64.xml +9 -0
  395. data/tests/illformed/atom10/feed_rights_base64_2.xml +9 -0
  396. data/tests/illformed/atom10/feed_rights_content_type.xml +7 -0
  397. data/tests/illformed/atom10/feed_rights_content_type_text.xml +7 -0
  398. data/tests/illformed/atom10/feed_rights_content_value.xml +7 -0
  399. data/tests/illformed/atom10/feed_rights_escaped_markup.xml +7 -0
  400. data/tests/illformed/atom10/feed_rights_inline_markup.xml +7 -0
  401. data/tests/illformed/atom10/feed_rights_inline_markup_2.xml +7 -0
  402. data/tests/illformed/atom10/feed_rights_text_plain.xml +7 -0
  403. data/tests/illformed/atom10/feed_subtitle.xml +7 -0
  404. data/tests/illformed/atom10/feed_subtitle_base64.xml +9 -0
  405. data/tests/illformed/atom10/feed_subtitle_base64_2.xml +9 -0
  406. data/tests/illformed/atom10/feed_subtitle_content_type.xml +7 -0
  407. data/tests/illformed/atom10/feed_subtitle_content_type_text.xml +7 -0
  408. data/tests/illformed/atom10/feed_subtitle_content_value.xml +7 -0
  409. data/tests/illformed/atom10/feed_subtitle_escaped_markup.xml +7 -0
  410. data/tests/illformed/atom10/feed_subtitle_inline_markup.xml +7 -0
  411. data/tests/illformed/atom10/feed_subtitle_inline_markup_2.xml +7 -0
  412. data/tests/illformed/atom10/feed_subtitle_text_plain.xml +7 -0
  413. data/tests/illformed/atom10/feed_title.xml +7 -0
  414. data/tests/illformed/atom10/feed_title_base64.xml +9 -0
  415. data/tests/illformed/atom10/feed_title_base64_2.xml +9 -0
  416. data/tests/illformed/atom10/feed_title_content_type.xml +7 -0
  417. data/tests/illformed/atom10/feed_title_content_type_text.xml +7 -0
  418. data/tests/illformed/atom10/feed_title_content_value.xml +7 -0
  419. data/tests/illformed/atom10/feed_title_escaped_markup.xml +7 -0
  420. data/tests/illformed/atom10/feed_title_inline_markup.xml +7 -0
  421. data/tests/illformed/atom10/feed_title_inline_markup_2.xml +7 -0
  422. data/tests/illformed/atom10/feed_title_text_plain.xml +7 -0
  423. data/tests/illformed/atom10/relative_uri.xml +7 -0
  424. data/tests/illformed/atom10/relative_uri_inherit.xml +7 -0
  425. data/tests/illformed/atom10/relative_uri_inherit_2.xml +7 -0
  426. data/tests/illformed/base/cdf_item_abstract_xml_base.xml +18 -0
  427. data/tests/illformed/base/entry_content_xml_base.xml +9 -0
  428. data/tests/illformed/base/entry_content_xml_base_inherit.xml +9 -0
  429. data/tests/illformed/base/entry_content_xml_base_inherit_2.xml +9 -0
  430. data/tests/illformed/base/entry_content_xml_base_inherit_3.xml +10 -0
  431. data/tests/illformed/base/entry_content_xml_base_inherit_4.xml +10 -0
  432. data/tests/illformed/base/entry_summary_xml_base.xml +9 -0
  433. data/tests/illformed/base/entry_summary_xml_base_inherit.xml +9 -0
  434. data/tests/illformed/base/entry_summary_xml_base_inherit_2.xml +9 -0
  435. data/tests/illformed/base/entry_summary_xml_base_inherit_3.xml +10 -0
  436. data/tests/illformed/base/entry_summary_xml_base_inherit_4.xml +10 -0
  437. data/tests/illformed/base/entry_title_xml_base.xml +9 -0
  438. data/tests/illformed/base/entry_title_xml_base_inherit.xml +9 -0
  439. data/tests/illformed/base/entry_title_xml_base_inherit_2.xml +9 -0
  440. data/tests/illformed/base/entry_title_xml_base_inherit_3.xml +10 -0
  441. data/tests/illformed/base/entry_title_xml_base_inherit_4.xml +10 -0
  442. data/tests/illformed/base/feed_copyright_xml_base.xml +7 -0
  443. data/tests/illformed/base/feed_copyright_xml_base_inherit.xml +7 -0
  444. data/tests/illformed/base/feed_copyright_xml_base_inherit_2.xml +7 -0
  445. data/tests/illformed/base/feed_copyright_xml_base_inherit_3.xml +8 -0
  446. data/tests/illformed/base/feed_copyright_xml_base_inherit_4.xml +8 -0
  447. data/tests/illformed/base/feed_info_xml_base.xml +7 -0
  448. data/tests/illformed/base/feed_info_xml_base_inherit.xml +7 -0
  449. data/tests/illformed/base/feed_info_xml_base_inherit_2.xml +7 -0
  450. data/tests/illformed/base/feed_info_xml_base_inherit_3.xml +8 -0
  451. data/tests/illformed/base/feed_info_xml_base_inherit_4.xml +8 -0
  452. data/tests/illformed/base/feed_tagline_xml_base.xml +7 -0
  453. data/tests/illformed/base/feed_tagline_xml_base_inherit.xml +7 -0
  454. data/tests/illformed/base/feed_tagline_xml_base_inherit_2.xml +7 -0
  455. data/tests/illformed/base/feed_tagline_xml_base_inherit_3.xml +8 -0
  456. data/tests/illformed/base/feed_tagline_xml_base_inherit_4.xml +8 -0
  457. data/tests/illformed/base/feed_title_xml_base.xml +7 -0
  458. data/tests/illformed/base/feed_title_xml_base_inherit.xml +7 -0
  459. data/tests/illformed/base/feed_title_xml_base_inherit_2.xml +7 -0
  460. data/tests/illformed/base/feed_title_xml_base_inherit_3.xml +8 -0
  461. data/tests/illformed/base/feed_title_xml_base_inherit_4.xml +8 -0
  462. data/tests/illformed/base/http_channel_docs_base_content_location.xml +10 -0
  463. data/tests/illformed/base/http_channel_docs_base_docuri.xml +9 -0
  464. data/tests/illformed/base/http_channel_link_base_content_location.xml +10 -0
  465. data/tests/illformed/base/http_channel_link_base_docuri.xml +9 -0
  466. data/tests/illformed/base/http_entry_author_url_base_content_location.xml +12 -0
  467. data/tests/illformed/base/http_entry_author_url_base_docuri.xml +11 -0
  468. data/tests/illformed/base/http_entry_content_base64_base_content_location.xml +12 -0
  469. data/tests/illformed/base/http_entry_content_base64_base_docuri.xml +11 -0
  470. data/tests/illformed/base/http_entry_content_base_content_location.xml +10 -0
  471. data/tests/illformed/base/http_entry_content_base_docuri.xml +9 -0
  472. data/tests/illformed/base/http_entry_content_inline_base_content_location.xml +10 -0
  473. data/tests/illformed/base/http_entry_content_inline_base_docuri.xml +9 -0
  474. data/tests/illformed/base/http_entry_contributor_url_base_content_location.xml +12 -0
  475. data/tests/illformed/base/http_entry_contributor_url_base_docuri.xml +11 -0
  476. data/tests/illformed/base/http_entry_id_base_content_location.xml +10 -0
  477. data/tests/illformed/base/http_entry_id_base_docuri.xml +9 -0
  478. data/tests/illformed/base/http_entry_link_base_content_location.xml +10 -0
  479. data/tests/illformed/base/http_entry_link_base_docuri.xml +9 -0
  480. data/tests/illformed/base/http_entry_summary_base64_base_content_location.xml +12 -0
  481. data/tests/illformed/base/http_entry_summary_base64_base_docuri.xml +11 -0
  482. data/tests/illformed/base/http_entry_summary_base_content_location.xml +10 -0
  483. data/tests/illformed/base/http_entry_summary_base_docuri.xml +9 -0
  484. data/tests/illformed/base/http_entry_summary_inline_base_content_location.xml +10 -0
  485. data/tests/illformed/base/http_entry_summary_inline_base_docuri.xml +9 -0
  486. data/tests/illformed/base/http_entry_title_base64_base_content_location.xml +12 -0
  487. data/tests/illformed/base/http_entry_title_base64_base_docuri.xml +11 -0
  488. data/tests/illformed/base/http_entry_title_base_content_location.xml +10 -0
  489. data/tests/illformed/base/http_entry_title_base_docuri.xml +9 -0
  490. data/tests/illformed/base/http_entry_title_inline_base_content_location.xml +10 -0
  491. data/tests/illformed/base/http_entry_title_inline_base_docuri.xml +9 -0
  492. data/tests/illformed/base/http_feed_author_url_base_content_location.xml +10 -0
  493. data/tests/illformed/base/http_feed_author_url_base_docuri.xml +9 -0
  494. data/tests/illformed/base/http_feed_contributor_url_base_content_location.xml +10 -0
  495. data/tests/illformed/base/http_feed_contributor_url_base_docuri.xml +9 -0
  496. data/tests/illformed/base/http_feed_copyright_base64_base_content_location.xml +10 -0
  497. data/tests/illformed/base/http_feed_copyright_base64_base_docuri.xml +9 -0
  498. data/tests/illformed/base/http_feed_copyright_base_content_location.xml +8 -0
  499. data/tests/illformed/base/http_feed_copyright_base_docuri.xml +7 -0
  500. data/tests/illformed/base/http_feed_copyright_inline_base_content_location.xml +8 -0
  501. data/tests/illformed/base/http_feed_copyright_inline_base_docuri.xml +7 -0
  502. data/tests/illformed/base/http_feed_generator_url_base_content_location.xml +8 -0
  503. data/tests/illformed/base/http_feed_generator_url_base_docuri.xml +7 -0
  504. data/tests/illformed/base/http_feed_id_base_content_location.xml +8 -0
  505. data/tests/illformed/base/http_feed_id_base_docuri.xml +7 -0
  506. data/tests/illformed/base/http_feed_info_base64_base_content_location.xml +10 -0
  507. data/tests/illformed/base/http_feed_info_base64_base_docuri.xml +9 -0
  508. data/tests/illformed/base/http_feed_info_base_content_location.xml +8 -0
  509. data/tests/illformed/base/http_feed_info_base_docuri.xml +7 -0
  510. data/tests/illformed/base/http_feed_info_inline_base_content_location.xml +8 -0
  511. data/tests/illformed/base/http_feed_info_inline_base_docuri.xml +7 -0
  512. data/tests/illformed/base/http_feed_link_base_content_location.xml +8 -0
  513. data/tests/illformed/base/http_feed_link_base_docuri.xml +7 -0
  514. data/tests/illformed/base/http_feed_tagline_base64_base_content_location.xml +10 -0
  515. data/tests/illformed/base/http_feed_tagline_base64_base_docuri.xml +9 -0
  516. data/tests/illformed/base/http_feed_tagline_base_content_location.xml +8 -0
  517. data/tests/illformed/base/http_feed_tagline_base_docuri.xml +7 -0
  518. data/tests/illformed/base/http_feed_tagline_inline_base_content_location.xml +8 -0
  519. data/tests/illformed/base/http_feed_tagline_inline_base_docuri.xml +7 -0
  520. data/tests/illformed/base/http_feed_title_base64_base_content_location.xml +10 -0
  521. data/tests/illformed/base/http_feed_title_base64_base_docuri.xml +9 -0
  522. data/tests/illformed/base/http_feed_title_base_content_location.xml +8 -0
  523. data/tests/illformed/base/http_feed_title_base_docuri.xml +7 -0
  524. data/tests/illformed/base/http_feed_title_inline_base_content_location.xml +8 -0
  525. data/tests/illformed/base/http_feed_title_inline_base_docuri.xml +7 -0
  526. data/tests/illformed/base/http_item_body_base_content_location.xml +12 -0
  527. data/tests/illformed/base/http_item_body_base_docuri.xml +11 -0
  528. data/tests/illformed/base/http_item_comments_base_content_location.xml +12 -0
  529. data/tests/illformed/base/http_item_comments_base_docuri.xml +11 -0
  530. data/tests/illformed/base/http_item_content_encoded_base_content_location.xml +12 -0
  531. data/tests/illformed/base/http_item_content_encoded_base_docuri.xml +11 -0
  532. data/tests/illformed/base/http_item_description_base_content_location.xml +12 -0
  533. data/tests/illformed/base/http_item_description_base_docuri.xml +11 -0
  534. data/tests/illformed/base/http_item_fullitem_base_content_location.xml +12 -0
  535. data/tests/illformed/base/http_item_fullitem_base_docuri.xml +11 -0
  536. data/tests/illformed/base/http_item_link_base_content_location.xml +12 -0
  537. data/tests/illformed/base/http_item_link_base_docuri.xml +11 -0
  538. data/tests/illformed/base/http_item_wfw_commentRSS_base_content_location.xml +12 -0
  539. data/tests/illformed/base/http_item_wfw_commentRSS_base_docuri.xml +11 -0
  540. data/tests/illformed/base/http_item_wfw_comment_base_content_location.xml +12 -0
  541. data/tests/illformed/base/http_item_wfw_comment_base_docuri.xml +11 -0
  542. data/tests/illformed/base/http_item_xhtml_body_base_content_location.xml +12 -0
  543. data/tests/illformed/base/http_item_xhtml_body_base_docuri.xml +11 -0
  544. data/tests/illformed/base/http_relative_xml_base.xml +10 -0
  545. data/tests/illformed/base/malformed_base.xml +9 -0
  546. data/tests/illformed/base/relative_xml_base.xml +9 -0
  547. data/tests/illformed/base/relative_xml_base_2.xml +9 -0
  548. data/tests/illformed/cdf/channel_abstract_map_description.xml +7 -0
  549. data/tests/illformed/cdf/channel_abstract_map_tagline.xml +7 -0
  550. data/tests/illformed/cdf/channel_href_map_link.xml +6 -0
  551. data/tests/illformed/cdf/channel_href_map_links.xml +6 -0
  552. data/tests/illformed/cdf/channel_title.xml +7 -0
  553. data/tests/illformed/cdf/item_abstract_map_description.xml +9 -0
  554. data/tests/illformed/cdf/item_abstract_map_summary.xml +9 -0
  555. data/tests/illformed/cdf/item_href_map_link.xml +8 -0
  556. data/tests/illformed/cdf/item_href_map_links.xml +8 -0
  557. data/tests/illformed/cdf/item_title.xml +9 -0
  558. data/tests/illformed/chardet/big5.xml +8 -0
  559. data/tests/illformed/chardet/eucjp.xml +13 -0
  560. data/tests/illformed/chardet/euckr.xml +13 -0
  561. data/tests/illformed/chardet/gb2312.xml +12 -0
  562. data/tests/illformed/chardet/koi8r.xml +14 -0
  563. data/tests/illformed/chardet/shiftjis.xml +11 -0
  564. data/tests/illformed/chardet/tis620.xml +12 -0
  565. data/tests/illformed/chardet/windows1255.xml +14 -0
  566. data/tests/illformed/date/cdf_channel_lastmod_map_date.xml +6 -0
  567. data/tests/illformed/date/cdf_channel_lastmod_map_modified.xml +6 -0
  568. data/tests/illformed/date/cdf_channel_lastmod_map_modified_parsed.xml +6 -0
  569. data/tests/illformed/date/cdf_item_lastmod_map_date.xml +8 -0
  570. data/tests/illformed/date/cdf_item_lastmod_map_modified.xml +8 -0
  571. data/tests/illformed/date/cdf_item_lastmod_map_modified_parsed.xml +8 -0
  572. data/tests/illformed/date/channel_dc_date.xml +9 -0
  573. data/tests/illformed/date/channel_dc_date_map_modified.xml +9 -0
  574. data/tests/illformed/date/channel_dc_date_w3dtf_utc.xml +9 -0
  575. data/tests/illformed/date/channel_dc_date_w3dtf_utc_map_modified_parsed.xml +9 -0
  576. data/tests/illformed/date/channel_dcterms_created.xml +9 -0
  577. data/tests/illformed/date/channel_dcterms_created_w3dtf_utc.xml +9 -0
  578. data/tests/illformed/date/channel_dcterms_issued.xml +9 -0
  579. data/tests/illformed/date/channel_dcterms_issued_w3dtf_utc.xml +9 -0
  580. data/tests/illformed/date/channel_dcterms_modified.xml +9 -0
  581. data/tests/illformed/date/channel_dcterms_modified_map_date.xml +9 -0
  582. data/tests/illformed/date/channel_dcterms_modified_w3dtf_utc.xml +9 -0
  583. data/tests/illformed/date/channel_dcterms_modified_w3dtf_utc_map_date.xml +9 -0
  584. data/tests/illformed/date/channel_pubDate.xml +9 -0
  585. data/tests/illformed/date/channel_pubDate_asctime.xml +9 -0
  586. data/tests/illformed/date/channel_pubDate_disney.xml +9 -0
  587. data/tests/illformed/date/channel_pubDate_disney_at.xml +9 -0
  588. data/tests/illformed/date/channel_pubDate_disney_ct.xml +9 -0
  589. data/tests/illformed/date/channel_pubDate_disney_mt.xml +9 -0
  590. data/tests/illformed/date/channel_pubDate_disney_pt.xml +9 -0
  591. data/tests/illformed/date/channel_pubDate_greek_1.xml +9 -0
  592. data/tests/illformed/date/channel_pubDate_hungarian_1.xml +9 -0
  593. data/tests/illformed/date/channel_pubDate_iso8601_ym.xml +9 -0
  594. data/tests/illformed/date/channel_pubDate_iso8601_ym_2.xml +9 -0
  595. data/tests/illformed/date/channel_pubDate_iso8601_ymd.xml +9 -0
  596. data/tests/illformed/date/channel_pubDate_iso8601_ymd_2.xml +9 -0
  597. data/tests/illformed/date/channel_pubDate_iso8601_yo_2.xml +9 -0
  598. data/tests/illformed/date/channel_pubDate_korean_nate.xml +11 -0
  599. data/tests/illformed/date/channel_pubDate_map_modified.xml +9 -0
  600. data/tests/illformed/date/channel_pubDate_mssql.xml +9 -0
  601. data/tests/illformed/date/channel_pubDate_mssql_nofraction.xml +9 -0
  602. data/tests/illformed/date/channel_pubDate_nosecond.xml +9 -0
  603. data/tests/illformed/date/channel_pubDate_notime.xml +9 -0
  604. data/tests/illformed/date/channel_pubDate_rfc2822.xml +9 -0
  605. data/tests/illformed/date/channel_pubDate_rfc2822_rollover_june_31.xml +9 -0
  606. data/tests/illformed/date/channel_pubDate_rfc822.xml +9 -0
  607. data/tests/illformed/date/channel_pubDate_w3dtf_rollover_61m.xml +9 -0
  608. data/tests/illformed/date/channel_pubDate_w3dtf_rollover_61s.xml +9 -0
  609. data/tests/illformed/date/channel_pubDate_w3dtf_rollover_leapyear.xml +9 -0
  610. data/tests/illformed/date/channel_pubDate_w3dtf_rollover_leapyear400.xml +9 -0
  611. data/tests/illformed/date/channel_pubDate_w3dtf_rollover_nonleapyear.xml +9 -0
  612. data/tests/illformed/date/channel_pubDate_w3dtf_sf.xml +9 -0
  613. data/tests/illformed/date/channel_pubDate_w3dtf_tokyo.xml +9 -0
  614. data/tests/illformed/date/channel_pubDate_w3dtf_utc.xml +9 -0
  615. data/tests/illformed/date/channel_pubDate_w3dtf_y.xml +9 -0
  616. data/tests/illformed/date/channel_pubDate_w3dtf_ym.xml +9 -0
  617. data/tests/illformed/date/channel_pubDate_w3dtf_ymd.xml +9 -0
  618. data/tests/illformed/date/channel_pubDate_w3dtf_ymd_2.xml +9 -0
  619. data/tests/illformed/date/entry_created.xml +9 -0
  620. data/tests/illformed/date/entry_created_w3dtf_utc.xml +9 -0
  621. data/tests/illformed/date/entry_issued.xml +9 -0
  622. data/tests/illformed/date/entry_issued_w3dtf_utc.xml +9 -0
  623. data/tests/illformed/date/entry_modified.xml +9 -0
  624. data/tests/illformed/date/entry_modified_map_date.xml +9 -0
  625. data/tests/illformed/date/entry_modified_w3dtf_utc.xml +9 -0
  626. data/tests/illformed/date/entry_published_w3dtf_utc.xml +9 -0
  627. data/tests/illformed/date/entry_source_updated_w3dtf_utc.xml +11 -0
  628. data/tests/illformed/date/entry_updated_w3dtf_utc.xml +9 -0
  629. data/tests/illformed/date/feed_modified.xml +9 -0
  630. data/tests/illformed/date/feed_modified_asctime.xml +9 -0
  631. data/tests/illformed/date/feed_modified_disney.xml +7 -0
  632. data/tests/illformed/date/feed_modified_disney_at.xml +7 -0
  633. data/tests/illformed/date/feed_modified_disney_ct.xml +7 -0
  634. data/tests/illformed/date/feed_modified_disney_mt.xml +7 -0
  635. data/tests/illformed/date/feed_modified_disney_pt.xml +7 -0
  636. data/tests/illformed/date/feed_modified_iso8601_ym.xml +9 -0
  637. data/tests/illformed/date/feed_modified_iso8601_ym_2.xml +9 -0
  638. data/tests/illformed/date/feed_modified_iso8601_ymd.xml +9 -0
  639. data/tests/illformed/date/feed_modified_iso8601_ymd_2.xml +9 -0
  640. data/tests/illformed/date/feed_modified_iso8601_yo_2.xml +9 -0
  641. data/tests/illformed/date/feed_modified_map_date.xml +9 -0
  642. data/tests/illformed/date/feed_modified_rfc2822.xml +9 -0
  643. data/tests/illformed/date/feed_modified_rfc2822_rollover_june_31.xml +9 -0
  644. data/tests/illformed/date/feed_modified_rfc822.xml +9 -0
  645. data/tests/illformed/date/feed_modified_w3dtf_rollover_leapyear.xml +9 -0
  646. data/tests/illformed/date/feed_modified_w3dtf_rollover_leapyear400.xml +9 -0
  647. data/tests/illformed/date/feed_modified_w3dtf_rollover_nonleapyear.xml +9 -0
  648. data/tests/illformed/date/feed_modified_w3dtf_sf.xml +9 -0
  649. data/tests/illformed/date/feed_modified_w3dtf_tokyo.xml +9 -0
  650. data/tests/illformed/date/feed_modified_w3dtf_utc.xml +9 -0
  651. data/tests/illformed/date/feed_modified_w3dtf_y.xml +9 -0
  652. data/tests/illformed/date/feed_modified_w3dtf_ym.xml +9 -0
  653. data/tests/illformed/date/feed_modified_w3dtf_ymd.xml +9 -0
  654. data/tests/illformed/date/feed_modified_w3dtf_ymd_2.xml +9 -0
  655. data/tests/illformed/date/feed_updated_w3dtf_utc.xml +7 -0
  656. data/tests/illformed/date/http_high_bit_date.xml +12 -0
  657. data/tests/illformed/date/item_dc_date.xml +11 -0
  658. data/tests/illformed/date/item_dc_date_map_modified.xml +11 -0
  659. data/tests/illformed/date/item_dc_date_w3dtf_utc.xml +11 -0
  660. data/tests/illformed/date/item_dc_date_w3dtf_utc_map_modified_parsed.xml +11 -0
  661. data/tests/illformed/date/item_dcterms_created.xml +11 -0
  662. data/tests/illformed/date/item_dcterms_created_w3dtf_utc.xml +11 -0
  663. data/tests/illformed/date/item_dcterms_issued.xml +11 -0
  664. data/tests/illformed/date/item_dcterms_issued_w3dtf_utc.xml +11 -0
  665. data/tests/illformed/date/item_dcterms_modified.xml +11 -0
  666. data/tests/illformed/date/item_dcterms_modified_map_date.xml +11 -0
  667. data/tests/illformed/date/item_dcterms_modified_w3dtf_utc.xml +11 -0
  668. data/tests/illformed/date/item_dcterms_modified_w3dtf_utc_map_date.xml +11 -0
  669. data/tests/illformed/date/item_expirationDate.xml +11 -0
  670. data/tests/illformed/date/item_expirationDate_rfc2822.xml +11 -0
  671. data/tests/illformed/date/item_pubDate.xml +11 -0
  672. data/tests/illformed/date/item_pubDate_euc-kr.xml +13 -0
  673. data/tests/illformed/date/item_pubDate_map_modified.xml +11 -0
  674. data/tests/illformed/date/item_pubDate_rfc2822.xml +11 -0
  675. data/tests/illformed/encoding/bogus_encoding.xml +7 -0
  676. data/tests/illformed/encoding/encoding_mismatch_crash.xml +10 -0
  677. data/tests/illformed/encoding/http_i18n.xml +13 -0
  678. data/tests/illformed/encoding/http_text_plain.xml +8 -0
  679. data/tests/illformed/encoding/http_text_plain_charset.xml +8 -0
  680. data/tests/illformed/encoding/utf-16be-autodetect.xml +0 -0
  681. data/tests/illformed/encoding/utf-16be-bom.xml +0 -0
  682. data/tests/illformed/encoding/utf-16be.xml +0 -0
  683. data/tests/illformed/encoding/utf-16le-autodetect.xml +0 -0
  684. data/tests/illformed/encoding/utf-16le-bom.xml +0 -0
  685. data/tests/illformed/encoding/utf-16le.xml +0 -0
  686. data/tests/illformed/encoding/utf-32be-autodetect.xml +0 -0
  687. data/tests/illformed/encoding/utf-32be-bom.xml +0 -0
  688. data/tests/illformed/encoding/utf-32be.xml +0 -0
  689. data/tests/illformed/encoding/utf-32le-autodetect.xml +0 -0
  690. data/tests/illformed/encoding/utf-32le-bom.xml +0 -0
  691. data/tests/illformed/encoding/utf-32le.xml +0 -0
  692. data/tests/illformed/encoding/utf-8-bom.xml +8 -0
  693. data/tests/illformed/encoding/x80_437.xml +9 -0
  694. data/tests/illformed/encoding/x80_850.xml +9 -0
  695. data/tests/illformed/encoding/x80_852.xml +9 -0
  696. data/tests/illformed/encoding/x80_855.xml +9 -0
  697. data/tests/illformed/encoding/x80_857.xml +9 -0
  698. data/tests/illformed/encoding/x80_860.xml +9 -0
  699. data/tests/illformed/encoding/x80_861.xml +9 -0
  700. data/tests/illformed/encoding/x80_862.xml +9 -0
  701. data/tests/illformed/encoding/x80_863.xml +9 -0
  702. data/tests/illformed/encoding/x80_865.xml +9 -0
  703. data/tests/illformed/encoding/x80_866.xml +9 -0
  704. data/tests/illformed/encoding/x80_cp037.xml +1 -0
  705. data/tests/illformed/encoding/x80_cp1125.xml +9 -0
  706. data/tests/illformed/encoding/x80_cp1250.xml +9 -0
  707. data/tests/illformed/encoding/x80_cp1251.xml +9 -0
  708. data/tests/illformed/encoding/x80_cp1252.xml +9 -0
  709. data/tests/illformed/encoding/x80_cp1253.xml +9 -0
  710. data/tests/illformed/encoding/x80_cp1254.xml +9 -0
  711. data/tests/illformed/encoding/x80_cp1255.xml +9 -0
  712. data/tests/illformed/encoding/x80_cp1256.xml +9 -0
  713. data/tests/illformed/encoding/x80_cp1257.xml +9 -0
  714. data/tests/illformed/encoding/x80_cp1258.xml +9 -0
  715. data/tests/illformed/encoding/x80_cp437.xml +9 -0
  716. data/tests/illformed/encoding/x80_cp500.xml +1 -0
  717. data/tests/illformed/encoding/x80_cp737.xml +9 -0
  718. data/tests/illformed/encoding/x80_cp775.xml +9 -0
  719. data/tests/illformed/encoding/x80_cp850.xml +9 -0
  720. data/tests/illformed/encoding/x80_cp852.xml +9 -0
  721. data/tests/illformed/encoding/x80_cp855.xml +9 -0
  722. data/tests/illformed/encoding/x80_cp856.xml +9 -0
  723. data/tests/illformed/encoding/x80_cp857.xml +9 -0
  724. data/tests/illformed/encoding/x80_cp860.xml +9 -0
  725. data/tests/illformed/encoding/x80_cp861.xml +9 -0
  726. data/tests/illformed/encoding/x80_cp862.xml +9 -0
  727. data/tests/illformed/encoding/x80_cp863.xml +9 -0
  728. data/tests/illformed/encoding/x80_cp864.xml +9 -0
  729. data/tests/illformed/encoding/x80_cp865.xml +9 -0
  730. data/tests/illformed/encoding/x80_cp866.xml +9 -0
  731. data/tests/illformed/encoding/x80_cp874.xml +9 -0
  732. data/tests/illformed/encoding/x80_cp875.xml +1 -0
  733. data/tests/illformed/encoding/x80_cp_is.xml +9 -0
  734. data/tests/illformed/encoding/x80_csibm037.xml +1 -0
  735. data/tests/illformed/encoding/x80_csibm500.xml +1 -0
  736. data/tests/illformed/encoding/x80_csibm855.xml +9 -0
  737. data/tests/illformed/encoding/x80_csibm857.xml +9 -0
  738. data/tests/illformed/encoding/x80_csibm860.xml +9 -0
  739. data/tests/illformed/encoding/x80_csibm861.xml +9 -0
  740. data/tests/illformed/encoding/x80_csibm863.xml +9 -0
  741. data/tests/illformed/encoding/x80_csibm864.xml +9 -0
  742. data/tests/illformed/encoding/x80_csibm865.xml +9 -0
  743. data/tests/illformed/encoding/x80_csibm866.xml +9 -0
  744. data/tests/illformed/encoding/x80_cskoi8r.xml +9 -0
  745. data/tests/illformed/encoding/x80_csmacintosh.xml +9 -0
  746. data/tests/illformed/encoding/x80_cspc775baltic.xml +9 -0
  747. data/tests/illformed/encoding/x80_cspc850multilingual.xml +9 -0
  748. data/tests/illformed/encoding/x80_cspc862latinhebrew.xml +9 -0
  749. data/tests/illformed/encoding/x80_cspc8codepage437.xml +9 -0
  750. data/tests/illformed/encoding/x80_cspcp852.xml +9 -0
  751. data/tests/illformed/encoding/x80_dbcs.xml +9 -0
  752. data/tests/illformed/encoding/x80_ebcdic-cp-be.xml +1 -0
  753. data/tests/illformed/encoding/x80_ebcdic-cp-ca.xml +1 -0
  754. data/tests/illformed/encoding/x80_ebcdic-cp-ch.xml +1 -0
  755. data/tests/illformed/encoding/x80_ebcdic-cp-nl.xml +1 -0
  756. data/tests/illformed/encoding/x80_ebcdic-cp-us.xml +1 -0
  757. data/tests/illformed/encoding/x80_ebcdic-cp-wt.xml +1 -0
  758. data/tests/illformed/encoding/x80_ebcdic_cp_be.xml +1 -0
  759. data/tests/illformed/encoding/x80_ebcdic_cp_ca.xml +1 -0
  760. data/tests/illformed/encoding/x80_ebcdic_cp_ch.xml +1 -0
  761. data/tests/illformed/encoding/x80_ebcdic_cp_nl.xml +1 -0
  762. data/tests/illformed/encoding/x80_ebcdic_cp_us.xml +1 -0
  763. data/tests/illformed/encoding/x80_ebcdic_cp_wt.xml +1 -0
  764. data/tests/illformed/encoding/x80_ibm037.xml +1 -0
  765. data/tests/illformed/encoding/x80_ibm039.xml +1 -0
  766. data/tests/illformed/encoding/x80_ibm1140.xml +1 -0
  767. data/tests/illformed/encoding/x80_ibm437.xml +9 -0
  768. data/tests/illformed/encoding/x80_ibm500.xml +1 -0
  769. data/tests/illformed/encoding/x80_ibm775.xml +9 -0
  770. data/tests/illformed/encoding/x80_ibm850.xml +9 -0
  771. data/tests/illformed/encoding/x80_ibm852.xml +9 -0
  772. data/tests/illformed/encoding/x80_ibm855.xml +9 -0
  773. data/tests/illformed/encoding/x80_ibm857.xml +9 -0
  774. data/tests/illformed/encoding/x80_ibm860.xml +9 -0
  775. data/tests/illformed/encoding/x80_ibm861.xml +9 -0
  776. data/tests/illformed/encoding/x80_ibm862.xml +9 -0
  777. data/tests/illformed/encoding/x80_ibm863.xml +9 -0
  778. data/tests/illformed/encoding/x80_ibm864.xml +9 -0
  779. data/tests/illformed/encoding/x80_ibm865.xml +9 -0
  780. data/tests/illformed/encoding/x80_ibm866.xml +9 -0
  781. data/tests/illformed/encoding/x80_koi8-r.xml +9 -0
  782. data/tests/illformed/encoding/x80_koi8-t.xml +9 -0
  783. data/tests/illformed/encoding/x80_koi8-u.xml +9 -0
  784. data/tests/illformed/encoding/x80_mac-cyrillic.xml +9 -0
  785. data/tests/illformed/encoding/x80_mac.xml +9 -0
  786. data/tests/illformed/encoding/x80_maccentraleurope.xml +9 -0
  787. data/tests/illformed/encoding/x80_maccyrillic.xml +9 -0
  788. data/tests/illformed/encoding/x80_macgreek.xml +9 -0
  789. data/tests/illformed/encoding/x80_maciceland.xml +9 -0
  790. data/tests/illformed/encoding/x80_macintosh.xml +9 -0
  791. data/tests/illformed/encoding/x80_maclatin2.xml +9 -0
  792. data/tests/illformed/encoding/x80_macroman.xml +9 -0
  793. data/tests/illformed/encoding/x80_macturkish.xml +9 -0
  794. data/tests/illformed/encoding/x80_ms-ansi.xml +9 -0
  795. data/tests/illformed/encoding/x80_ms-arab.xml +9 -0
  796. data/tests/illformed/encoding/x80_ms-cyrl.xml +9 -0
  797. data/tests/illformed/encoding/x80_ms-ee.xml +9 -0
  798. data/tests/illformed/encoding/x80_ms-greek.xml +9 -0
  799. data/tests/illformed/encoding/x80_ms-hebr.xml +9 -0
  800. data/tests/illformed/encoding/x80_ms-turk.xml +9 -0
  801. data/tests/illformed/encoding/x80_tcvn-5712.xml +9 -0
  802. data/tests/illformed/encoding/x80_tcvn.xml +9 -0
  803. data/tests/illformed/encoding/x80_tcvn5712-1.xml +9 -0
  804. data/tests/illformed/encoding/x80_viscii.xml +9 -0
  805. data/tests/illformed/encoding/x80_winbaltrim.xml +9 -0
  806. data/tests/illformed/encoding/x80_windows-1250.xml +9 -0
  807. data/tests/illformed/encoding/x80_windows-1251.xml +9 -0
  808. data/tests/illformed/encoding/x80_windows-1252.xml +9 -0
  809. data/tests/illformed/encoding/x80_windows-1253.xml +9 -0
  810. data/tests/illformed/encoding/x80_windows-1254.xml +9 -0
  811. data/tests/illformed/encoding/x80_windows-1255.xml +9 -0
  812. data/tests/illformed/encoding/x80_windows-1256.xml +9 -0
  813. data/tests/illformed/encoding/x80_windows-1257.xml +9 -0
  814. data/tests/illformed/encoding/x80_windows-1258.xml +9 -0
  815. data/tests/illformed/encoding/x80_windows_1250.xml +9 -0
  816. data/tests/illformed/encoding/x80_windows_1251.xml +9 -0
  817. data/tests/illformed/encoding/x80_windows_1252.xml +9 -0
  818. data/tests/illformed/encoding/x80_windows_1253.xml +9 -0
  819. data/tests/illformed/encoding/x80_windows_1254.xml +9 -0
  820. data/tests/illformed/encoding/x80_windows_1255.xml +9 -0
  821. data/tests/illformed/encoding/x80_windows_1256.xml +9 -0
  822. data/tests/illformed/encoding/x80_windows_1257.xml +9 -0
  823. data/tests/illformed/encoding/x80_windows_1258.xml +9 -0
  824. data/tests/illformed/entities/160.xml +9 -0
  825. data/tests/illformed/entities/732.xml +9 -0
  826. data/tests/illformed/entities/8216.xml +9 -0
  827. data/tests/illformed/entities/8217.xml +9 -0
  828. data/tests/illformed/entities/8220.xml +9 -0
  829. data/tests/illformed/entities/8221.xml +9 -0
  830. data/tests/illformed/entities/9830.xml +9 -0
  831. data/tests/illformed/entities/aacute.xml +9 -0
  832. data/tests/illformed/entities/acirc.xml +9 -0
  833. data/tests/illformed/entities/acute.xml +9 -0
  834. data/tests/illformed/entities/aelig.xml +9 -0
  835. data/tests/illformed/entities/agrave.xml +9 -0
  836. data/tests/illformed/entities/alefsym.xml +9 -0
  837. data/tests/illformed/entities/alpha.xml +9 -0
  838. data/tests/illformed/entities/and.xml +9 -0
  839. data/tests/illformed/entities/ang.xml +9 -0
  840. data/tests/illformed/entities/aring.xml +9 -0
  841. data/tests/illformed/entities/asymp.xml +9 -0
  842. data/tests/illformed/entities/atilde.xml +9 -0
  843. data/tests/illformed/entities/auml.xml +9 -0
  844. data/tests/illformed/entities/bdquo.xml +9 -0
  845. data/tests/illformed/entities/beta.xml +9 -0
  846. data/tests/illformed/entities/brvbar.xml +9 -0
  847. data/tests/illformed/entities/bull.xml +9 -0
  848. data/tests/illformed/entities/cap.xml +9 -0
  849. data/tests/illformed/entities/ccedil.xml +9 -0
  850. data/tests/illformed/entities/cedil.xml +9 -0
  851. data/tests/illformed/entities/cent.xml +9 -0
  852. data/tests/illformed/entities/chi.xml +9 -0
  853. data/tests/illformed/entities/circ.xml +9 -0
  854. data/tests/illformed/entities/clubs.xml +9 -0
  855. data/tests/illformed/entities/cong.xml +9 -0
  856. data/tests/illformed/entities/copy.xml +9 -0
  857. data/tests/illformed/entities/crarr.xml +9 -0
  858. data/tests/illformed/entities/cup.xml +9 -0
  859. data/tests/illformed/entities/curren.xml +9 -0
  860. data/tests/illformed/entities/dagger.xml +9 -0
  861. data/tests/illformed/entities/darr.xml +9 -0
  862. data/tests/illformed/entities/deg.xml +9 -0
  863. data/tests/illformed/entities/delta.xml +9 -0
  864. data/tests/illformed/entities/diams.xml +9 -0
  865. data/tests/illformed/entities/divide.xml +9 -0
  866. data/tests/illformed/entities/doesnotexist.xml +9 -0
  867. data/tests/illformed/entities/eacute.xml +9 -0
  868. data/tests/illformed/entities/ecirc.xml +9 -0
  869. data/tests/illformed/entities/egrave.xml +9 -0
  870. data/tests/illformed/entities/empty.xml +9 -0
  871. data/tests/illformed/entities/emsp.xml +9 -0
  872. data/tests/illformed/entities/ensp.xml +9 -0
  873. data/tests/illformed/entities/epsilon.xml +9 -0
  874. data/tests/illformed/entities/equiv.xml +9 -0
  875. data/tests/illformed/entities/eta.xml +9 -0
  876. data/tests/illformed/entities/eth.xml +9 -0
  877. data/tests/illformed/entities/euml.xml +9 -0
  878. data/tests/illformed/entities/euro.xml +9 -0
  879. data/tests/illformed/entities/exist.xml +9 -0
  880. data/tests/illformed/entities/fnof.xml +9 -0
  881. data/tests/illformed/entities/forall.xml +9 -0
  882. data/tests/illformed/entities/frac12.xml +9 -0
  883. data/tests/illformed/entities/frac14.xml +9 -0
  884. data/tests/illformed/entities/frac34.xml +9 -0
  885. data/tests/illformed/entities/frasl.xml +9 -0
  886. data/tests/illformed/entities/gamma.xml +9 -0
  887. data/tests/illformed/entities/ge.xml +9 -0
  888. data/tests/illformed/entities/hArr.xml +9 -0
  889. data/tests/illformed/entities/hearts.xml +9 -0
  890. data/tests/illformed/entities/hellip.xml +9 -0
  891. data/tests/illformed/entities/iacute.xml +9 -0
  892. data/tests/illformed/entities/icirc.xml +9 -0
  893. data/tests/illformed/entities/iexcl.xml +9 -0
  894. data/tests/illformed/entities/igrave.xml +9 -0
  895. data/tests/illformed/entities/image.xml +9 -0
  896. data/tests/illformed/entities/infin.xml +9 -0
  897. data/tests/illformed/entities/int.xml +9 -0
  898. data/tests/illformed/entities/iota.xml +9 -0
  899. data/tests/illformed/entities/iquest.xml +9 -0
  900. data/tests/illformed/entities/isin.xml +9 -0
  901. data/tests/illformed/entities/iuml.xml +9 -0
  902. data/tests/illformed/entities/kappa.xml +9 -0
  903. data/tests/illformed/entities/lArr.xml +9 -0
  904. data/tests/illformed/entities/lambda.xml +9 -0
  905. data/tests/illformed/entities/lang.xml +9 -0
  906. data/tests/illformed/entities/laquo.xml +9 -0
  907. data/tests/illformed/entities/lceil.xml +9 -0
  908. data/tests/illformed/entities/ldquo.xml +9 -0
  909. data/tests/illformed/entities/le.xml +9 -0
  910. data/tests/illformed/entities/lfloor.xml +9 -0
  911. data/tests/illformed/entities/lowast.xml +9 -0
  912. data/tests/illformed/entities/loz.xml +9 -0
  913. data/tests/illformed/entities/lrm.xml +9 -0
  914. data/tests/illformed/entities/lsaquo.xml +9 -0
  915. data/tests/illformed/entities/lsquo.xml +9 -0
  916. data/tests/illformed/entities/macr.xml +9 -0
  917. data/tests/illformed/entities/mdash.xml +9 -0
  918. data/tests/illformed/entities/micro.xml +9 -0
  919. data/tests/illformed/entities/middot.xml +9 -0
  920. data/tests/illformed/entities/minus.xml +9 -0
  921. data/tests/illformed/entities/mu.xml +9 -0
  922. data/tests/illformed/entities/nabla.xml +9 -0
  923. data/tests/illformed/entities/nbsp.xml +9 -0
  924. data/tests/illformed/entities/ndash.xml +9 -0
  925. data/tests/illformed/entities/ne.xml +9 -0
  926. data/tests/illformed/entities/ni.xml +9 -0
  927. data/tests/illformed/entities/not.xml +9 -0
  928. data/tests/illformed/entities/notin.xml +9 -0
  929. data/tests/illformed/entities/nsub.xml +9 -0
  930. data/tests/illformed/entities/ntilde.xml +9 -0
  931. data/tests/illformed/entities/nu.xml +9 -0
  932. data/tests/illformed/entities/oacute.xml +9 -0
  933. data/tests/illformed/entities/ocirc.xml +9 -0
  934. data/tests/illformed/entities/oelig.xml +9 -0
  935. data/tests/illformed/entities/ograve.xml +9 -0
  936. data/tests/illformed/entities/oline.xml +9 -0
  937. data/tests/illformed/entities/omega.xml +9 -0
  938. data/tests/illformed/entities/omicron.xml +9 -0
  939. data/tests/illformed/entities/oplus.xml +9 -0
  940. data/tests/illformed/entities/or.xml +9 -0
  941. data/tests/illformed/entities/ordf.xml +9 -0
  942. data/tests/illformed/entities/ordm.xml +9 -0
  943. data/tests/illformed/entities/oslash.xml +9 -0
  944. data/tests/illformed/entities/otilde.xml +9 -0
  945. data/tests/illformed/entities/otimes.xml +9 -0
  946. data/tests/illformed/entities/ouml.xml +9 -0
  947. data/tests/illformed/entities/para.xml +9 -0
  948. data/tests/illformed/entities/part.xml +9 -0
  949. data/tests/illformed/entities/permil.xml +9 -0
  950. data/tests/illformed/entities/perp.xml +9 -0
  951. data/tests/illformed/entities/phi.xml +9 -0
  952. data/tests/illformed/entities/pi.xml +9 -0
  953. data/tests/illformed/entities/piv.xml +9 -0
  954. data/tests/illformed/entities/plusmn.xml +9 -0
  955. data/tests/illformed/entities/pound.xml +9 -0
  956. data/tests/illformed/entities/prime.xml +9 -0
  957. data/tests/illformed/entities/prod.xml +9 -0
  958. data/tests/illformed/entities/prop.xml +9 -0
  959. data/tests/illformed/entities/psi.xml +9 -0
  960. data/tests/illformed/entities/radic.xml +9 -0
  961. data/tests/illformed/entities/rang.xml +9 -0
  962. data/tests/illformed/entities/raquo.xml +9 -0
  963. data/tests/illformed/entities/rarr.xml +9 -0
  964. data/tests/illformed/entities/rceil.xml +9 -0
  965. data/tests/illformed/entities/rdquo.xml +9 -0
  966. data/tests/illformed/entities/real.xml +9 -0
  967. data/tests/illformed/entities/reg.xml +9 -0
  968. data/tests/illformed/entities/rfloor.xml +9 -0
  969. data/tests/illformed/entities/rho.xml +9 -0
  970. data/tests/illformed/entities/rlm.xml +9 -0
  971. data/tests/illformed/entities/rsaquo.xml +9 -0
  972. data/tests/illformed/entities/rsquo.xml +9 -0
  973. data/tests/illformed/entities/sbquo.xml +9 -0
  974. data/tests/illformed/entities/scaron.xml +9 -0
  975. data/tests/illformed/entities/sdot.xml +9 -0
  976. data/tests/illformed/entities/sect.xml +9 -0
  977. data/tests/illformed/entities/shy.xml +9 -0
  978. data/tests/illformed/entities/sigma.xml +9 -0
  979. data/tests/illformed/entities/sigmaf.xml +9 -0
  980. data/tests/illformed/entities/sim.xml +9 -0
  981. data/tests/illformed/entities/spades.xml +9 -0
  982. data/tests/illformed/entities/sub.xml +9 -0
  983. data/tests/illformed/entities/sube.xml +9 -0
  984. data/tests/illformed/entities/sum.xml +9 -0
  985. data/tests/illformed/entities/sup.xml +9 -0
  986. data/tests/illformed/entities/sup1.xml +9 -0
  987. data/tests/illformed/entities/sup2.xml +9 -0
  988. data/tests/illformed/entities/sup3.xml +9 -0
  989. data/tests/illformed/entities/supe.xml +9 -0
  990. data/tests/illformed/entities/szlig.xml +9 -0
  991. data/tests/illformed/entities/tau.xml +9 -0
  992. data/tests/illformed/entities/there4.xml +9 -0
  993. data/tests/illformed/entities/theta.xml +9 -0
  994. data/tests/illformed/entities/thetasym.xml +9 -0
  995. data/tests/illformed/entities/thinsp.xml +9 -0
  996. data/tests/illformed/entities/thorn.xml +9 -0
  997. data/tests/illformed/entities/tilde.xml +9 -0
  998. data/tests/illformed/entities/times.xml +9 -0
  999. data/tests/illformed/entities/trade.xml +9 -0
  1000. data/tests/illformed/entities/uacute.xml +9 -0
  1001. data/tests/illformed/entities/uarr.xml +9 -0
  1002. data/tests/illformed/entities/ucirc.xml +9 -0
  1003. data/tests/illformed/entities/ugrave.xml +9 -0
  1004. data/tests/illformed/entities/uml.xml +9 -0
  1005. data/tests/illformed/entities/upper_AElig.xml +9 -0
  1006. data/tests/illformed/entities/upper_Aacute.xml +9 -0
  1007. data/tests/illformed/entities/upper_Acirc.xml +9 -0
  1008. data/tests/illformed/entities/upper_Agrave.xml +9 -0
  1009. data/tests/illformed/entities/upper_Alpha.xml +9 -0
  1010. data/tests/illformed/entities/upper_Aring.xml +9 -0
  1011. data/tests/illformed/entities/upper_Atilde.xml +9 -0
  1012. data/tests/illformed/entities/upper_Auml.xml +9 -0
  1013. data/tests/illformed/entities/upper_Beta.xml +9 -0
  1014. data/tests/illformed/entities/upper_Ccedil.xml +9 -0
  1015. data/tests/illformed/entities/upper_Chi.xml +9 -0
  1016. data/tests/illformed/entities/upper_Dagger.xml +9 -0
  1017. data/tests/illformed/entities/upper_Delta.xml +9 -0
  1018. data/tests/illformed/entities/upper_ETH.xml +9 -0
  1019. data/tests/illformed/entities/upper_Eacute.xml +9 -0
  1020. data/tests/illformed/entities/upper_Ecirc.xml +9 -0
  1021. data/tests/illformed/entities/upper_Egrave.xml +9 -0
  1022. data/tests/illformed/entities/upper_Epsilon.xml +9 -0
  1023. data/tests/illformed/entities/upper_Eta.xml +9 -0
  1024. data/tests/illformed/entities/upper_Euml.xml +9 -0
  1025. data/tests/illformed/entities/upper_Gamma.xml +9 -0
  1026. data/tests/illformed/entities/upper_Iacute.xml +9 -0
  1027. data/tests/illformed/entities/upper_Icirc.xml +9 -0
  1028. data/tests/illformed/entities/upper_Igrave.xml +9 -0
  1029. data/tests/illformed/entities/upper_Iota.xml +9 -0
  1030. data/tests/illformed/entities/upper_Iuml.xml +9 -0
  1031. data/tests/illformed/entities/upper_Kappa.xml +9 -0
  1032. data/tests/illformed/entities/upper_Lambda.xml +9 -0
  1033. data/tests/illformed/entities/upper_Mu.xml +9 -0
  1034. data/tests/illformed/entities/upper_Ntilde.xml +9 -0
  1035. data/tests/illformed/entities/upper_Nu.xml +9 -0
  1036. data/tests/illformed/entities/upper_OElig.xml +9 -0
  1037. data/tests/illformed/entities/upper_Oacute.xml +9 -0
  1038. data/tests/illformed/entities/upper_Ocirc.xml +9 -0
  1039. data/tests/illformed/entities/upper_Ograve.xml +9 -0
  1040. data/tests/illformed/entities/upper_Omega.xml +9 -0
  1041. data/tests/illformed/entities/upper_Omicron.xml +9 -0
  1042. data/tests/illformed/entities/upper_Oslash.xml +9 -0
  1043. data/tests/illformed/entities/upper_Otilde.xml +9 -0
  1044. data/tests/illformed/entities/upper_Ouml.xml +9 -0
  1045. data/tests/illformed/entities/upper_Phi.xml +9 -0
  1046. data/tests/illformed/entities/upper_Pi.xml +9 -0
  1047. data/tests/illformed/entities/upper_Prime.xml +9 -0
  1048. data/tests/illformed/entities/upper_Psi.xml +9 -0
  1049. data/tests/illformed/entities/upper_Rho.xml +9 -0
  1050. data/tests/illformed/entities/upper_Scaron.xml +9 -0
  1051. data/tests/illformed/entities/upper_Sigma.xml +9 -0
  1052. data/tests/illformed/entities/upper_THORN.xml +9 -0
  1053. data/tests/illformed/entities/upper_Tau.xml +9 -0
  1054. data/tests/illformed/entities/upper_Theta.xml +9 -0
  1055. data/tests/illformed/entities/upper_Uacute.xml +9 -0
  1056. data/tests/illformed/entities/upper_Ucirc.xml +9 -0
  1057. data/tests/illformed/entities/upper_Ugrave.xml +9 -0
  1058. data/tests/illformed/entities/upper_Upsilon.xml +9 -0
  1059. data/tests/illformed/entities/upper_Uuml.xml +9 -0
  1060. data/tests/illformed/entities/upper_Xi.xml +9 -0
  1061. data/tests/illformed/entities/upper_Yacute.xml +9 -0
  1062. data/tests/illformed/entities/upper_Yuml.xml +9 -0
  1063. data/tests/illformed/entities/upper_Zeta.xml +9 -0
  1064. data/tests/illformed/entities/upsih.xml +9 -0
  1065. data/tests/illformed/entities/upsilon.xml +9 -0
  1066. data/tests/illformed/entities/uuml.xml +9 -0
  1067. data/tests/illformed/entities/weierp.xml +9 -0
  1068. data/tests/illformed/entities/xi.xml +9 -0
  1069. data/tests/illformed/entities/yacute.xml +9 -0
  1070. data/tests/illformed/entities/yen.xml +9 -0
  1071. data/tests/illformed/entities/yuml.xml +9 -0
  1072. data/tests/illformed/entities/zeta.xml +9 -0
  1073. data/tests/illformed/entities/zwj.xml +9 -0
  1074. data/tests/illformed/entities/zwnj.xml +9 -0
  1075. data/tests/illformed/itunes/itunes_channel_block.xml +9 -0
  1076. data/tests/illformed/itunes/itunes_channel_block_false.xml +9 -0
  1077. data/tests/illformed/itunes/itunes_channel_block_no.xml +9 -0
  1078. data/tests/illformed/itunes/itunes_channel_block_true.xml +9 -0
  1079. data/tests/illformed/itunes/itunes_channel_block_uppercase.xml +9 -0
  1080. data/tests/illformed/itunes/itunes_channel_block_whitespace.xml +9 -0
  1081. data/tests/illformed/itunes/itunes_channel_category.xml +9 -0
  1082. data/tests/illformed/itunes/itunes_channel_category_nested.xml +11 -0
  1083. data/tests/illformed/itunes/itunes_channel_category_scheme.xml +9 -0
  1084. data/tests/illformed/itunes/itunes_channel_explicit.xml +9 -0
  1085. data/tests/illformed/itunes/itunes_channel_explicit_false.xml +9 -0
  1086. data/tests/illformed/itunes/itunes_channel_explicit_no.xml +9 -0
  1087. data/tests/illformed/itunes/itunes_channel_explicit_true.xml +9 -0
  1088. data/tests/illformed/itunes/itunes_channel_explicit_uppercase.xml +9 -0
  1089. data/tests/illformed/itunes/itunes_channel_explicit_whitespace.xml +9 -0
  1090. data/tests/illformed/itunes/itunes_channel_image.xml +9 -0
  1091. data/tests/illformed/itunes/itunes_channel_keywords.xml +9 -0
  1092. data/tests/illformed/itunes/itunes_channel_keywords_duplicate.xml +9 -0
  1093. data/tests/illformed/itunes/itunes_channel_keywords_duplicate_2.xml +10 -0
  1094. data/tests/illformed/itunes/itunes_channel_keywords_multiple.xml +9 -0
  1095. data/tests/illformed/itunes/itunes_channel_link_image.xml +9 -0
  1096. data/tests/illformed/itunes/itunes_channel_owner_email.xml +12 -0
  1097. data/tests/illformed/itunes/itunes_channel_owner_name.xml +12 -0
  1098. data/tests/illformed/itunes/itunes_channel_subtitle.xml +9 -0
  1099. data/tests/illformed/itunes/itunes_channel_summary.xml +9 -0
  1100. data/tests/illformed/itunes/itunes_core_element_uppercase.xml +9 -0
  1101. data/tests/illformed/itunes/itunes_enclosure_url_maps_id.xml +11 -0
  1102. data/tests/illformed/itunes/itunes_enclosure_url_maps_id_2.xml +12 -0
  1103. data/tests/illformed/itunes/itunes_item_author_map_author.xml +11 -0
  1104. data/tests/illformed/itunes/itunes_item_block.xml +11 -0
  1105. data/tests/illformed/itunes/itunes_item_block_false.xml +11 -0
  1106. data/tests/illformed/itunes/itunes_item_block_no.xml +11 -0
  1107. data/tests/illformed/itunes/itunes_item_block_true.xml +11 -0
  1108. data/tests/illformed/itunes/itunes_item_block_uppercase.xml +11 -0
  1109. data/tests/illformed/itunes/itunes_item_block_whitespace.xml +11 -0
  1110. data/tests/illformed/itunes/itunes_item_category.xml +11 -0
  1111. data/tests/illformed/itunes/itunes_item_category_nested.xml +13 -0
  1112. data/tests/illformed/itunes/itunes_item_category_scheme.xml +11 -0
  1113. data/tests/illformed/itunes/itunes_item_duration.xml +11 -0
  1114. data/tests/illformed/itunes/itunes_item_explicit.xml +11 -0
  1115. data/tests/illformed/itunes/itunes_item_explicit_false.xml +11 -0
  1116. data/tests/illformed/itunes/itunes_item_explicit_no.xml +11 -0
  1117. data/tests/illformed/itunes/itunes_item_explicit_true.xml +11 -0
  1118. data/tests/illformed/itunes/itunes_item_explicit_uppercase.xml +11 -0
  1119. data/tests/illformed/itunes/itunes_item_explicit_whitespace.xml +11 -0
  1120. data/tests/illformed/itunes/itunes_item_image.xml +11 -0
  1121. data/tests/illformed/itunes/itunes_item_link_image.xml +11 -0
  1122. data/tests/illformed/itunes/itunes_item_subtitle.xml +11 -0
  1123. data/tests/illformed/itunes/itunes_item_summary.xml +11 -0
  1124. data/tests/illformed/itunes/itunes_namespace.xml +9 -0
  1125. data/tests/illformed/itunes/itunes_namespace_example.xml +9 -0
  1126. data/tests/illformed/itunes/itunes_namespace_lowercase.xml +9 -0
  1127. data/tests/illformed/itunes/itunes_namespace_uppercase.xml +9 -0
  1128. data/tests/illformed/lang/channel_dc_language.xml +9 -0
  1129. data/tests/illformed/lang/channel_language.xml +9 -0
  1130. data/tests/illformed/lang/entry_content_xml_lang.xml +9 -0
  1131. data/tests/illformed/lang/entry_content_xml_lang_blank.xml +9 -0
  1132. data/tests/illformed/lang/entry_content_xml_lang_blank_2.xml +9 -0
  1133. data/tests/illformed/lang/entry_content_xml_lang_blank_3.xml +12 -0
  1134. data/tests/illformed/lang/entry_content_xml_lang_inherit.xml +9 -0
  1135. data/tests/illformed/lang/entry_content_xml_lang_inherit_2.xml +9 -0
  1136. data/tests/illformed/lang/entry_content_xml_lang_inherit_3.xml +10 -0
  1137. data/tests/illformed/lang/entry_content_xml_lang_inherit_4.xml +10 -0
  1138. data/tests/illformed/lang/entry_summary_xml_lang.xml +9 -0
  1139. data/tests/illformed/lang/entry_summary_xml_lang_blank.xml +9 -0
  1140. data/tests/illformed/lang/entry_summary_xml_lang_inherit.xml +9 -0
  1141. data/tests/illformed/lang/entry_summary_xml_lang_inherit_2.xml +9 -0
  1142. data/tests/illformed/lang/entry_summary_xml_lang_inherit_3.xml +10 -0
  1143. data/tests/illformed/lang/entry_summary_xml_lang_inherit_4.xml +10 -0
  1144. data/tests/illformed/lang/entry_title_xml_lang.xml +9 -0
  1145. data/tests/illformed/lang/entry_title_xml_lang_blank.xml +9 -0
  1146. data/tests/illformed/lang/entry_title_xml_lang_inherit.xml +9 -0
  1147. data/tests/illformed/lang/entry_title_xml_lang_inherit_2.xml +9 -0
  1148. data/tests/illformed/lang/entry_title_xml_lang_inherit_3.xml +10 -0
  1149. data/tests/illformed/lang/entry_title_xml_lang_inherit_4.xml +10 -0
  1150. data/tests/illformed/lang/feed_copyright_xml_lang.xml +7 -0
  1151. data/tests/illformed/lang/feed_copyright_xml_lang_blank.xml +7 -0
  1152. data/tests/illformed/lang/feed_copyright_xml_lang_inherit.xml +7 -0
  1153. data/tests/illformed/lang/feed_copyright_xml_lang_inherit_2.xml +7 -0
  1154. data/tests/illformed/lang/feed_copyright_xml_lang_inherit_3.xml +8 -0
  1155. data/tests/illformed/lang/feed_copyright_xml_lang_inherit_4.xml +8 -0
  1156. data/tests/illformed/lang/feed_info_xml_lang.xml +7 -0
  1157. data/tests/illformed/lang/feed_info_xml_lang_blank.xml +7 -0
  1158. data/tests/illformed/lang/feed_info_xml_lang_inherit.xml +7 -0
  1159. data/tests/illformed/lang/feed_info_xml_lang_inherit_2.xml +7 -0
  1160. data/tests/illformed/lang/feed_info_xml_lang_inherit_3.xml +8 -0
  1161. data/tests/illformed/lang/feed_info_xml_lang_inherit_4.xml +8 -0
  1162. data/tests/illformed/lang/feed_language.xml +9 -0
  1163. data/tests/illformed/lang/feed_language_override.xml +9 -0
  1164. data/tests/illformed/lang/feed_not_xml_lang.xml +7 -0
  1165. data/tests/illformed/lang/feed_not_xml_lang_2.xml +7 -0
  1166. data/tests/illformed/lang/feed_tagline_xml_lang.xml +7 -0
  1167. data/tests/illformed/lang/feed_tagline_xml_lang_blank.xml +7 -0
  1168. data/tests/illformed/lang/feed_tagline_xml_lang_inherit.xml +7 -0
  1169. data/tests/illformed/lang/feed_tagline_xml_lang_inherit_2.xml +7 -0
  1170. data/tests/illformed/lang/feed_tagline_xml_lang_inherit_3.xml +8 -0
  1171. data/tests/illformed/lang/feed_tagline_xml_lang_inherit_4.xml +8 -0
  1172. data/tests/illformed/lang/feed_title_xml_lang.xml +7 -0
  1173. data/tests/illformed/lang/feed_title_xml_lang_blank.xml +7 -0
  1174. data/tests/illformed/lang/feed_title_xml_lang_inherit.xml +7 -0
  1175. data/tests/illformed/lang/feed_title_xml_lang_inherit_2.xml +7 -0
  1176. data/tests/illformed/lang/feed_title_xml_lang_inherit_3.xml +8 -0
  1177. data/tests/illformed/lang/feed_title_xml_lang_inherit_4.xml +8 -0
  1178. data/tests/illformed/lang/feed_xml_lang.xml +6 -0
  1179. data/tests/illformed/lang/http_content_language.xml +7 -0
  1180. data/tests/illformed/lang/http_content_language_entry_title_inherit.xml +10 -0
  1181. data/tests/illformed/lang/http_content_language_entry_title_inherit_2.xml +11 -0
  1182. data/tests/illformed/lang/http_content_language_feed_language.xml +10 -0
  1183. data/tests/illformed/lang/http_content_language_feed_xml_lang.xml +7 -0
  1184. data/tests/illformed/lang/item_content_encoded_xml_lang.xml +11 -0
  1185. data/tests/illformed/lang/item_content_encoded_xml_lang_inherit.xml +11 -0
  1186. data/tests/illformed/lang/item_dc_language.xml +11 -0
  1187. data/tests/illformed/lang/item_fullitem_xml_lang.xml +11 -0
  1188. data/tests/illformed/lang/item_fullitem_xml_lang_inherit.xml +11 -0
  1189. data/tests/illformed/lang/item_xhtml_body_xml_lang.xml +13 -0
  1190. data/tests/illformed/lang/item_xhtml_body_xml_lang_inherit.xml +13 -0
  1191. data/tests/illformed/namespace/rss1.0withModules.xml +47 -0
  1192. data/tests/illformed/namespace/rss1.0withModulesNoDefNS.xml +48 -0
  1193. data/tests/illformed/namespace/rss1.0withModulesNoDefNSLocalNameClash.xml +53 -0
  1194. data/tests/illformed/namespace/rss2.0NSwithModules.xml +50 -0
  1195. data/tests/illformed/namespace/rss2.0NSwithModulesNoDefNS.xml +50 -0
  1196. data/tests/illformed/namespace/rss2.0NSwithModulesNoDefNSLocalNameClash.xml +58 -0
  1197. data/tests/illformed/namespace/rss2.0noNSwithModules.xml +49 -0
  1198. data/tests/illformed/namespace/rss2.0noNSwithModulesLocalNameClash.xml +57 -0
  1199. data/tests/illformed/namespace/undeclared_namespace.xml +10 -0
  1200. data/tests/illformed/rdf/rdf_channel_description.xml +9 -0
  1201. data/tests/illformed/rdf/rdf_channel_empty_textinput.xml +26 -0
  1202. data/tests/illformed/rdf/rdf_channel_link.xml +9 -0
  1203. data/tests/illformed/rdf/rdf_channel_title.xml +9 -0
  1204. data/tests/illformed/rdf/rdf_item_description.xml +16 -0
  1205. data/tests/illformed/rdf/rdf_item_link.xml +16 -0
  1206. data/tests/illformed/rdf/rdf_item_rdf_about.xml +15 -0
  1207. data/tests/illformed/rdf/rdf_item_title.xml +16 -0
  1208. data/tests/illformed/rdf/rss090_channel_title.xml +12 -0
  1209. data/tests/illformed/rdf/rss090_item_title.xml +12 -0
  1210. data/tests/illformed/rdf/rss_version_10.xml +6 -0
  1211. data/tests/illformed/rdf/rss_version_10_not_default_ns.xml +8 -0
  1212. data/tests/illformed/rss/aaa_illformed.xml +6 -0
  1213. data/tests/illformed/rss/channel_author.xml +9 -0
  1214. data/tests/illformed/rss/channel_author_map_author_detail_email.xml +9 -0
  1215. data/tests/illformed/rss/channel_author_map_author_detail_email_2.xml +9 -0
  1216. data/tests/illformed/rss/channel_author_map_author_detail_email_3.xml +9 -0
  1217. data/tests/illformed/rss/channel_author_map_author_detail_name.xml +9 -0
  1218. data/tests/illformed/rss/channel_author_map_author_detail_name_2.xml +9 -0
  1219. data/tests/illformed/rss/channel_category.xml +9 -0
  1220. data/tests/illformed/rss/channel_category_domain.xml +9 -0
  1221. data/tests/illformed/rss/channel_category_multiple.xml +10 -0
  1222. data/tests/illformed/rss/channel_category_multiple_2.xml +10 -0
  1223. data/tests/illformed/rss/channel_cloud_domain.xml +9 -0
  1224. data/tests/illformed/rss/channel_cloud_path.xml +9 -0
  1225. data/tests/illformed/rss/channel_cloud_port.xml +9 -0
  1226. data/tests/illformed/rss/channel_cloud_protocol.xml +9 -0
  1227. data/tests/illformed/rss/channel_cloud_registerProcedure.xml +9 -0
  1228. data/tests/illformed/rss/channel_copyright.xml +9 -0
  1229. data/tests/illformed/rss/channel_dc_author.xml +9 -0
  1230. data/tests/illformed/rss/channel_dc_author_map_author_detail_email.xml +9 -0
  1231. data/tests/illformed/rss/channel_dc_author_map_author_detail_name.xml +9 -0
  1232. data/tests/illformed/rss/channel_dc_contributor.xml +9 -0
  1233. data/tests/illformed/rss/channel_dc_creator.xml +9 -0
  1234. data/tests/illformed/rss/channel_dc_creator_map_author_detail_email.xml +9 -0
  1235. data/tests/illformed/rss/channel_dc_creator_map_author_detail_name.xml +9 -0
  1236. data/tests/illformed/rss/channel_dc_publisher.xml +9 -0
  1237. data/tests/illformed/rss/channel_dc_publisher_email.xml +9 -0
  1238. data/tests/illformed/rss/channel_dc_publisher_name.xml +9 -0
  1239. data/tests/illformed/rss/channel_dc_rights.xml +9 -0
  1240. data/tests/illformed/rss/channel_dc_subject.xml +9 -0
  1241. data/tests/illformed/rss/channel_dc_subject_2.xml +9 -0
  1242. data/tests/illformed/rss/channel_dc_subject_multiple.xml +10 -0
  1243. data/tests/illformed/rss/channel_dc_title.xml +9 -0
  1244. data/tests/illformed/rss/channel_description.xml +9 -0
  1245. data/tests/illformed/rss/channel_description_escaped_markup.xml +9 -0
  1246. data/tests/illformed/rss/channel_description_map_tagline.xml +9 -0
  1247. data/tests/illformed/rss/channel_description_naked_markup.xml +9 -0
  1248. data/tests/illformed/rss/channel_description_shorttag.xml +10 -0
  1249. data/tests/illformed/rss/channel_docs.xml +9 -0
  1250. data/tests/illformed/rss/channel_generator.xml +9 -0
  1251. data/tests/illformed/rss/channel_image_description.xml +16 -0
  1252. data/tests/illformed/rss/channel_image_height.xml +16 -0
  1253. data/tests/illformed/rss/channel_image_link.xml +16 -0
  1254. data/tests/illformed/rss/channel_image_link_conflict.xml +12 -0
  1255. data/tests/illformed/rss/channel_image_title.xml +16 -0
  1256. data/tests/illformed/rss/channel_image_title_conflict.xml +12 -0
  1257. data/tests/illformed/rss/channel_image_url.xml +16 -0
  1258. data/tests/illformed/rss/channel_image_width.xml +16 -0
  1259. data/tests/illformed/rss/channel_link.xml +9 -0
  1260. data/tests/illformed/rss/channel_managingEditor.xml +9 -0
  1261. data/tests/illformed/rss/channel_managingEditor_map_author_detail_email.xml +9 -0
  1262. data/tests/illformed/rss/channel_managingEditor_map_author_detail_name.xml +9 -0
  1263. data/tests/illformed/rss/channel_textInput_description.xml +14 -0
  1264. data/tests/illformed/rss/channel_textInput_description_conflict.xml +12 -0
  1265. data/tests/illformed/rss/channel_textInput_link.xml +12 -0
  1266. data/tests/illformed/rss/channel_textInput_link_conflict.xml +12 -0
  1267. data/tests/illformed/rss/channel_textInput_name.xml +11 -0
  1268. data/tests/illformed/rss/channel_textInput_title.xml +12 -0
  1269. data/tests/illformed/rss/channel_textInput_title_conflict.xml +12 -0
  1270. data/tests/illformed/rss/channel_title.xml +9 -0
  1271. data/tests/illformed/rss/channel_title_apos.xml +9 -0
  1272. data/tests/illformed/rss/channel_title_gt.xml +9 -0
  1273. data/tests/illformed/rss/channel_title_lt.xml +9 -0
  1274. data/tests/illformed/rss/channel_ttl.xml +9 -0
  1275. data/tests/illformed/rss/channel_webMaster.xml +9 -0
  1276. data/tests/illformed/rss/channel_webMaster_email.xml +9 -0
  1277. data/tests/illformed/rss/channel_webMaster_name.xml +9 -0
  1278. data/tests/illformed/rss/item_author.xml +11 -0
  1279. data/tests/illformed/rss/item_author_map_author_detail_email.xml +11 -0
  1280. data/tests/illformed/rss/item_author_map_author_detail_name.xml +11 -0
  1281. data/tests/illformed/rss/item_category.xml +11 -0
  1282. data/tests/illformed/rss/item_category_domain.xml +11 -0
  1283. data/tests/illformed/rss/item_category_multiple.xml +12 -0
  1284. data/tests/illformed/rss/item_category_multiple_2.xml +12 -0
  1285. data/tests/illformed/rss/item_comments.xml +11 -0
  1286. data/tests/illformed/rss/item_content_encoded.xml +11 -0
  1287. data/tests/illformed/rss/item_content_encoded_mode.xml +11 -0
  1288. data/tests/illformed/rss/item_content_encoded_type.xml +11 -0
  1289. data/tests/illformed/rss/item_dc_author.xml +11 -0
  1290. data/tests/illformed/rss/item_dc_author_map_author_detail_email.xml +11 -0
  1291. data/tests/illformed/rss/item_dc_author_map_author_detail_name.xml +11 -0
  1292. data/tests/illformed/rss/item_dc_contributor.xml +11 -0
  1293. data/tests/illformed/rss/item_dc_creator.xml +11 -0
  1294. data/tests/illformed/rss/item_dc_creator_map_author_detail_email.xml +11 -0
  1295. data/tests/illformed/rss/item_dc_creator_map_author_detail_name.xml +11 -0
  1296. data/tests/illformed/rss/item_dc_publisher.xml +11 -0
  1297. data/tests/illformed/rss/item_dc_publisher_email.xml +11 -0
  1298. data/tests/illformed/rss/item_dc_publisher_name.xml +11 -0
  1299. data/tests/illformed/rss/item_dc_rights.xml +11 -0
  1300. data/tests/illformed/rss/item_dc_subject.xml +11 -0
  1301. data/tests/illformed/rss/item_dc_subject_2.xml +11 -0
  1302. data/tests/illformed/rss/item_dc_subject_multiple.xml +12 -0
  1303. data/tests/illformed/rss/item_dc_title.xml +11 -0
  1304. data/tests/illformed/rss/item_description.xml +11 -0
  1305. data/tests/illformed/rss/item_description_and_summary.xml +12 -0
  1306. data/tests/illformed/rss/item_description_br.xml +11 -0
  1307. data/tests/illformed/rss/item_description_br_shorttag.xml +12 -0
  1308. data/tests/illformed/rss/item_description_escaped_markup.xml +11 -0
  1309. data/tests/illformed/rss/item_description_map_summary.xml +11 -0
  1310. data/tests/illformed/rss/item_description_naked_markup.xml +11 -0
  1311. data/tests/illformed/rss/item_description_not_a_doctype.xml +9 -0
  1312. data/tests/illformed/rss/item_enclosure_length.xml +12 -0
  1313. data/tests/illformed/rss/item_enclosure_multiple.xml +13 -0
  1314. data/tests/illformed/rss/item_enclosure_type.xml +12 -0
  1315. data/tests/illformed/rss/item_enclosure_url.xml +12 -0
  1316. data/tests/illformed/rss/item_fullitem.xml +11 -0
  1317. data/tests/illformed/rss/item_fullitem_mode.xml +11 -0
  1318. data/tests/illformed/rss/item_fullitem_type.xml +11 -0
  1319. data/tests/illformed/rss/item_guid.xml +11 -0
  1320. data/tests/illformed/rss/item_guid_conflict_link.xml +12 -0
  1321. data/tests/illformed/rss/item_guid_guidislink.xml +11 -0
  1322. data/tests/illformed/rss/item_guid_isPermaLink_conflict_link.xml +12 -0
  1323. data/tests/illformed/rss/item_guid_isPermaLink_conflict_link_not_guidislink.xml +12 -0
  1324. data/tests/illformed/rss/item_guid_isPermaLink_guidislink.xml +11 -0
  1325. data/tests/illformed/rss/item_guid_isPermaLink_map_link.xml +11 -0
  1326. data/tests/illformed/rss/item_guid_map_link.xml +11 -0
  1327. data/tests/illformed/rss/item_guid_not_permalink.xml +11 -0
  1328. data/tests/illformed/rss/item_guid_not_permalink_conflict_link.xml +12 -0
  1329. data/tests/illformed/rss/item_guid_not_permalink_not_guidislink.xml +11 -0
  1330. data/tests/illformed/rss/item_guid_not_permalink_not_guidislink_2.xml +12 -0
  1331. data/tests/illformed/rss/item_link.xml +11 -0
  1332. data/tests/illformed/rss/item_source.xml +12 -0
  1333. data/tests/illformed/rss/item_source_url.xml +12 -0
  1334. data/tests/illformed/rss/item_summary_and_description.xml +12 -0
  1335. data/tests/illformed/rss/item_title.xml +11 -0
  1336. data/tests/illformed/rss/item_xhtml_body.xml +13 -0
  1337. data/tests/illformed/rss/item_xhtml_body_mode.xml +13 -0
  1338. data/tests/illformed/rss/item_xhtml_body_type.xml +13 -0
  1339. data/tests/illformed/rss/rss_namespace_1.xml +9 -0
  1340. data/tests/illformed/rss/rss_namespace_2.xml +9 -0
  1341. data/tests/illformed/rss/rss_namespace_3.xml +9 -0
  1342. data/tests/illformed/rss/rss_namespace_4.xml +9 -0
  1343. data/tests/illformed/rss/rss_version_090.xml +6 -0
  1344. data/tests/illformed/rss/rss_version_091_netscape.xml +7 -0
  1345. data/tests/illformed/rss/rss_version_092.xml +6 -0
  1346. data/tests/illformed/rss/rss_version_093.xml +6 -0
  1347. data/tests/illformed/rss/rss_version_094.xml +6 -0
  1348. data/tests/illformed/rss/rss_version_20.xml +6 -0
  1349. data/tests/illformed/rss/rss_version_201.xml +6 -0
  1350. data/tests/illformed/rss/rss_version_21.xml +6 -0
  1351. data/tests/illformed/rss/rss_version_missing.xml +9 -0
  1352. data/tests/illformed/sanitize/entry_content_applet.xml +9 -0
  1353. data/tests/illformed/sanitize/entry_content_blink.xml +9 -0
  1354. data/tests/illformed/sanitize/entry_content_crazy.xml +75 -0
  1355. data/tests/illformed/sanitize/entry_content_embed.xml +9 -0
  1356. data/tests/illformed/sanitize/entry_content_frame.xml +9 -0
  1357. data/tests/illformed/sanitize/entry_content_iframe.xml +9 -0
  1358. data/tests/illformed/sanitize/entry_content_link.xml +9 -0
  1359. data/tests/illformed/sanitize/entry_content_meta.xml +9 -0
  1360. data/tests/illformed/sanitize/entry_content_object.xml +9 -0
  1361. data/tests/illformed/sanitize/entry_content_onabort.xml +9 -0
  1362. data/tests/illformed/sanitize/entry_content_onblur.xml +9 -0
  1363. data/tests/illformed/sanitize/entry_content_onchange.xml +9 -0
  1364. data/tests/illformed/sanitize/entry_content_onclick.xml +9 -0
  1365. data/tests/illformed/sanitize/entry_content_ondblclick.xml +9 -0
  1366. data/tests/illformed/sanitize/entry_content_onerror.xml +9 -0
  1367. data/tests/illformed/sanitize/entry_content_onfocus.xml +9 -0
  1368. data/tests/illformed/sanitize/entry_content_onkeydown.xml +9 -0
  1369. data/tests/illformed/sanitize/entry_content_onkeypress.xml +9 -0
  1370. data/tests/illformed/sanitize/entry_content_onkeyup.xml +9 -0
  1371. data/tests/illformed/sanitize/entry_content_onload.xml +9 -0
  1372. data/tests/illformed/sanitize/entry_content_onmousedown.xml +9 -0
  1373. data/tests/illformed/sanitize/entry_content_onmouseout.xml +9 -0
  1374. data/tests/illformed/sanitize/entry_content_onmouseover.xml +9 -0
  1375. data/tests/illformed/sanitize/entry_content_onmouseup.xml +9 -0
  1376. data/tests/illformed/sanitize/entry_content_onreset.xml +9 -0
  1377. data/tests/illformed/sanitize/entry_content_onresize.xml +9 -0
  1378. data/tests/illformed/sanitize/entry_content_onsubmit.xml +9 -0
  1379. data/tests/illformed/sanitize/entry_content_onunload.xml +9 -0
  1380. data/tests/illformed/sanitize/entry_content_script.xml +9 -0
  1381. data/tests/illformed/sanitize/entry_content_script_base64.xml +12 -0
  1382. data/tests/illformed/sanitize/entry_content_script_cdata.xml +9 -0
  1383. data/tests/illformed/sanitize/entry_content_script_inline.xml +9 -0
  1384. data/tests/illformed/sanitize/entry_content_style.xml +9 -0
  1385. data/tests/illformed/sanitize/entry_summary_applet.xml +9 -0
  1386. data/tests/illformed/sanitize/entry_summary_blink.xml +9 -0
  1387. data/tests/illformed/sanitize/entry_summary_crazy.xml +75 -0
  1388. data/tests/illformed/sanitize/entry_summary_embed.xml +9 -0
  1389. data/tests/illformed/sanitize/entry_summary_frame.xml +9 -0
  1390. data/tests/illformed/sanitize/entry_summary_iframe.xml +9 -0
  1391. data/tests/illformed/sanitize/entry_summary_link.xml +9 -0
  1392. data/tests/illformed/sanitize/entry_summary_meta.xml +9 -0
  1393. data/tests/illformed/sanitize/entry_summary_object.xml +9 -0
  1394. data/tests/illformed/sanitize/entry_summary_onabort.xml +9 -0
  1395. data/tests/illformed/sanitize/entry_summary_onblur.xml +9 -0
  1396. data/tests/illformed/sanitize/entry_summary_onchange.xml +9 -0
  1397. data/tests/illformed/sanitize/entry_summary_onclick.xml +9 -0
  1398. data/tests/illformed/sanitize/entry_summary_ondblclick.xml +9 -0
  1399. data/tests/illformed/sanitize/entry_summary_onerror.xml +9 -0
  1400. data/tests/illformed/sanitize/entry_summary_onfocus.xml +9 -0
  1401. data/tests/illformed/sanitize/entry_summary_onkeydown.xml +9 -0
  1402. data/tests/illformed/sanitize/entry_summary_onkeypress.xml +9 -0
  1403. data/tests/illformed/sanitize/entry_summary_onkeyup.xml +9 -0
  1404. data/tests/illformed/sanitize/entry_summary_onload.xml +9 -0
  1405. data/tests/illformed/sanitize/entry_summary_onmousedown.xml +9 -0
  1406. data/tests/illformed/sanitize/entry_summary_onmouseout.xml +9 -0
  1407. data/tests/illformed/sanitize/entry_summary_onmouseover.xml +9 -0
  1408. data/tests/illformed/sanitize/entry_summary_onmouseup.xml +9 -0
  1409. data/tests/illformed/sanitize/entry_summary_onreset.xml +9 -0
  1410. data/tests/illformed/sanitize/entry_summary_onresize.xml +9 -0
  1411. data/tests/illformed/sanitize/entry_summary_onsubmit.xml +9 -0
  1412. data/tests/illformed/sanitize/entry_summary_onunload.xml +9 -0
  1413. data/tests/illformed/sanitize/entry_summary_script.xml +9 -0
  1414. data/tests/illformed/sanitize/entry_summary_script_base64.xml +12 -0
  1415. data/tests/illformed/sanitize/entry_summary_script_cdata.xml +9 -0
  1416. data/tests/illformed/sanitize/entry_summary_script_inline.xml +9 -0
  1417. data/tests/illformed/sanitize/entry_summary_script_map_description.xml +9 -0
  1418. data/tests/illformed/sanitize/entry_summary_style.xml +9 -0
  1419. data/tests/illformed/sanitize/entry_title_applet.xml +9 -0
  1420. data/tests/illformed/sanitize/entry_title_blink.xml +9 -0
  1421. data/tests/illformed/sanitize/entry_title_crazy.xml +75 -0
  1422. data/tests/illformed/sanitize/entry_title_embed.xml +9 -0
  1423. data/tests/illformed/sanitize/entry_title_frame.xml +9 -0
  1424. data/tests/illformed/sanitize/entry_title_iframe.xml +9 -0
  1425. data/tests/illformed/sanitize/entry_title_link.xml +9 -0
  1426. data/tests/illformed/sanitize/entry_title_meta.xml +9 -0
  1427. data/tests/illformed/sanitize/entry_title_object.xml +9 -0
  1428. data/tests/illformed/sanitize/entry_title_onabort.xml +9 -0
  1429. data/tests/illformed/sanitize/entry_title_onblur.xml +9 -0
  1430. data/tests/illformed/sanitize/entry_title_onchange.xml +9 -0
  1431. data/tests/illformed/sanitize/entry_title_onclick.xml +9 -0
  1432. data/tests/illformed/sanitize/entry_title_ondblclick.xml +9 -0
  1433. data/tests/illformed/sanitize/entry_title_onerror.xml +9 -0
  1434. data/tests/illformed/sanitize/entry_title_onfocus.xml +9 -0
  1435. data/tests/illformed/sanitize/entry_title_onkeydown.xml +9 -0
  1436. data/tests/illformed/sanitize/entry_title_onkeypress.xml +9 -0
  1437. data/tests/illformed/sanitize/entry_title_onkeyup.xml +9 -0
  1438. data/tests/illformed/sanitize/entry_title_onload.xml +9 -0
  1439. data/tests/illformed/sanitize/entry_title_onmousedown.xml +9 -0
  1440. data/tests/illformed/sanitize/entry_title_onmouseout.xml +9 -0
  1441. data/tests/illformed/sanitize/entry_title_onmouseover.xml +9 -0
  1442. data/tests/illformed/sanitize/entry_title_onmouseup.xml +9 -0
  1443. data/tests/illformed/sanitize/entry_title_onreset.xml +9 -0
  1444. data/tests/illformed/sanitize/entry_title_onresize.xml +9 -0
  1445. data/tests/illformed/sanitize/entry_title_onsubmit.xml +9 -0
  1446. data/tests/illformed/sanitize/entry_title_onunload.xml +9 -0
  1447. data/tests/illformed/sanitize/entry_title_script.xml +9 -0
  1448. data/tests/illformed/sanitize/entry_title_script_cdata.xml +9 -0
  1449. data/tests/illformed/sanitize/entry_title_script_inline.xml +9 -0
  1450. data/tests/illformed/sanitize/entry_title_style.xml +9 -0
  1451. data/tests/illformed/sanitize/feed_copyright_applet.xml +7 -0
  1452. data/tests/illformed/sanitize/feed_copyright_blink.xml +7 -0
  1453. data/tests/illformed/sanitize/feed_copyright_crazy.xml +73 -0
  1454. data/tests/illformed/sanitize/feed_copyright_embed.xml +7 -0
  1455. data/tests/illformed/sanitize/feed_copyright_frame.xml +7 -0
  1456. data/tests/illformed/sanitize/feed_copyright_iframe.xml +7 -0
  1457. data/tests/illformed/sanitize/feed_copyright_link.xml +7 -0
  1458. data/tests/illformed/sanitize/feed_copyright_meta.xml +7 -0
  1459. data/tests/illformed/sanitize/feed_copyright_object.xml +7 -0
  1460. data/tests/illformed/sanitize/feed_copyright_onabort.xml +7 -0
  1461. data/tests/illformed/sanitize/feed_copyright_onblur.xml +7 -0
  1462. data/tests/illformed/sanitize/feed_copyright_onchange.xml +7 -0
  1463. data/tests/illformed/sanitize/feed_copyright_onclick.xml +7 -0
  1464. data/tests/illformed/sanitize/feed_copyright_ondblclick.xml +7 -0
  1465. data/tests/illformed/sanitize/feed_copyright_onerror.xml +7 -0
  1466. data/tests/illformed/sanitize/feed_copyright_onfocus.xml +7 -0
  1467. data/tests/illformed/sanitize/feed_copyright_onkeydown.xml +7 -0
  1468. data/tests/illformed/sanitize/feed_copyright_onkeypress.xml +7 -0
  1469. data/tests/illformed/sanitize/feed_copyright_onkeyup.xml +7 -0
  1470. data/tests/illformed/sanitize/feed_copyright_onload.xml +7 -0
  1471. data/tests/illformed/sanitize/feed_copyright_onmousedown.xml +7 -0
  1472. data/tests/illformed/sanitize/feed_copyright_onmouseout.xml +7 -0
  1473. data/tests/illformed/sanitize/feed_copyright_onmouseover.xml +7 -0
  1474. data/tests/illformed/sanitize/feed_copyright_onmouseup.xml +7 -0
  1475. data/tests/illformed/sanitize/feed_copyright_onreset.xml +7 -0
  1476. data/tests/illformed/sanitize/feed_copyright_onresize.xml +7 -0
  1477. data/tests/illformed/sanitize/feed_copyright_onsubmit.xml +7 -0
  1478. data/tests/illformed/sanitize/feed_copyright_onunload.xml +7 -0
  1479. data/tests/illformed/sanitize/feed_copyright_script.xml +7 -0
  1480. data/tests/illformed/sanitize/feed_copyright_script_cdata.xml +7 -0
  1481. data/tests/illformed/sanitize/feed_copyright_script_inline.xml +7 -0
  1482. data/tests/illformed/sanitize/feed_copyright_style.xml +7 -0
  1483. data/tests/illformed/sanitize/feed_info_applet.xml +7 -0
  1484. data/tests/illformed/sanitize/feed_info_blink.xml +7 -0
  1485. data/tests/illformed/sanitize/feed_info_crazy.xml +73 -0
  1486. data/tests/illformed/sanitize/feed_info_embed.xml +7 -0
  1487. data/tests/illformed/sanitize/feed_info_frame.xml +7 -0
  1488. data/tests/illformed/sanitize/feed_info_iframe.xml +7 -0
  1489. data/tests/illformed/sanitize/feed_info_link.xml +7 -0
  1490. data/tests/illformed/sanitize/feed_info_meta.xml +7 -0
  1491. data/tests/illformed/sanitize/feed_info_object.xml +7 -0
  1492. data/tests/illformed/sanitize/feed_info_onabort.xml +7 -0
  1493. data/tests/illformed/sanitize/feed_info_onblur.xml +7 -0
  1494. data/tests/illformed/sanitize/feed_info_onchange.xml +7 -0
  1495. data/tests/illformed/sanitize/feed_info_onclick.xml +7 -0
  1496. data/tests/illformed/sanitize/feed_info_ondblclick.xml +7 -0
  1497. data/tests/illformed/sanitize/feed_info_onerror.xml +7 -0
  1498. data/tests/illformed/sanitize/feed_info_onfocus.xml +7 -0
  1499. data/tests/illformed/sanitize/feed_info_onkeydown.xml +7 -0
  1500. data/tests/illformed/sanitize/feed_info_onkeypress.xml +7 -0
  1501. data/tests/illformed/sanitize/feed_info_onkeyup.xml +7 -0
  1502. data/tests/illformed/sanitize/feed_info_onload.xml +7 -0
  1503. data/tests/illformed/sanitize/feed_info_onmousedown.xml +7 -0
  1504. data/tests/illformed/sanitize/feed_info_onmouseout.xml +7 -0
  1505. data/tests/illformed/sanitize/feed_info_onmouseover.xml +7 -0
  1506. data/tests/illformed/sanitize/feed_info_onmouseup.xml +7 -0
  1507. data/tests/illformed/sanitize/feed_info_onreset.xml +7 -0
  1508. data/tests/illformed/sanitize/feed_info_onresize.xml +7 -0
  1509. data/tests/illformed/sanitize/feed_info_onsubmit.xml +7 -0
  1510. data/tests/illformed/sanitize/feed_info_onunload.xml +7 -0
  1511. data/tests/illformed/sanitize/feed_info_script.xml +7 -0
  1512. data/tests/illformed/sanitize/feed_info_script_cdata.xml +7 -0
  1513. data/tests/illformed/sanitize/feed_info_script_inline.xml +7 -0
  1514. data/tests/illformed/sanitize/feed_info_style.xml +7 -0
  1515. data/tests/illformed/sanitize/feed_subtitle_applet.xml +7 -0
  1516. data/tests/illformed/sanitize/feed_subtitle_blink.xml +7 -0
  1517. data/tests/illformed/sanitize/feed_subtitle_crazy.xml +73 -0
  1518. data/tests/illformed/sanitize/feed_subtitle_embed.xml +7 -0
  1519. data/tests/illformed/sanitize/feed_subtitle_frame.xml +7 -0
  1520. data/tests/illformed/sanitize/feed_subtitle_iframe.xml +7 -0
  1521. data/tests/illformed/sanitize/feed_subtitle_link.xml +7 -0
  1522. data/tests/illformed/sanitize/feed_subtitle_meta.xml +7 -0
  1523. data/tests/illformed/sanitize/feed_subtitle_object.xml +7 -0
  1524. data/tests/illformed/sanitize/feed_subtitle_onabort.xml +7 -0
  1525. data/tests/illformed/sanitize/feed_subtitle_onblur.xml +7 -0
  1526. data/tests/illformed/sanitize/feed_subtitle_onchange.xml +7 -0
  1527. data/tests/illformed/sanitize/feed_subtitle_onclick.xml +7 -0
  1528. data/tests/illformed/sanitize/feed_subtitle_ondblclick.xml +7 -0
  1529. data/tests/illformed/sanitize/feed_subtitle_onerror.xml +7 -0
  1530. data/tests/illformed/sanitize/feed_subtitle_onfocus.xml +7 -0
  1531. data/tests/illformed/sanitize/feed_subtitle_onkeydown.xml +7 -0
  1532. data/tests/illformed/sanitize/feed_subtitle_onkeypress.xml +7 -0
  1533. data/tests/illformed/sanitize/feed_subtitle_onkeyup.xml +7 -0
  1534. data/tests/illformed/sanitize/feed_subtitle_onload.xml +7 -0
  1535. data/tests/illformed/sanitize/feed_subtitle_onmousedown.xml +7 -0
  1536. data/tests/illformed/sanitize/feed_subtitle_onmouseout.xml +7 -0
  1537. data/tests/illformed/sanitize/feed_subtitle_onmouseover.xml +7 -0
  1538. data/tests/illformed/sanitize/feed_subtitle_onmouseup.xml +7 -0
  1539. data/tests/illformed/sanitize/feed_subtitle_onreset.xml +7 -0
  1540. data/tests/illformed/sanitize/feed_subtitle_onresize.xml +7 -0
  1541. data/tests/illformed/sanitize/feed_subtitle_onsubmit.xml +7 -0
  1542. data/tests/illformed/sanitize/feed_subtitle_onunload.xml +7 -0
  1543. data/tests/illformed/sanitize/feed_subtitle_script.xml +7 -0
  1544. data/tests/illformed/sanitize/feed_subtitle_script_cdata.xml +7 -0
  1545. data/tests/illformed/sanitize/feed_subtitle_script_inline.xml +7 -0
  1546. data/tests/illformed/sanitize/feed_subtitle_style.xml +7 -0
  1547. data/tests/illformed/sanitize/feed_tagline_applet.xml +7 -0
  1548. data/tests/illformed/sanitize/feed_tagline_blink.xml +7 -0
  1549. data/tests/illformed/sanitize/feed_tagline_crazy.xml +73 -0
  1550. data/tests/illformed/sanitize/feed_tagline_embed.xml +7 -0
  1551. data/tests/illformed/sanitize/feed_tagline_frame.xml +7 -0
  1552. data/tests/illformed/sanitize/feed_tagline_iframe.xml +7 -0
  1553. data/tests/illformed/sanitize/feed_tagline_link.xml +7 -0
  1554. data/tests/illformed/sanitize/feed_tagline_meta.xml +7 -0
  1555. data/tests/illformed/sanitize/feed_tagline_object.xml +7 -0
  1556. data/tests/illformed/sanitize/feed_tagline_onabort.xml +7 -0
  1557. data/tests/illformed/sanitize/feed_tagline_onblur.xml +7 -0
  1558. data/tests/illformed/sanitize/feed_tagline_onchange.xml +7 -0
  1559. data/tests/illformed/sanitize/feed_tagline_onclick.xml +7 -0
  1560. data/tests/illformed/sanitize/feed_tagline_ondblclick.xml +7 -0
  1561. data/tests/illformed/sanitize/feed_tagline_onerror.xml +7 -0
  1562. data/tests/illformed/sanitize/feed_tagline_onfocus.xml +7 -0
  1563. data/tests/illformed/sanitize/feed_tagline_onkeydown.xml +7 -0
  1564. data/tests/illformed/sanitize/feed_tagline_onkeypress.xml +7 -0
  1565. data/tests/illformed/sanitize/feed_tagline_onkeyup.xml +7 -0
  1566. data/tests/illformed/sanitize/feed_tagline_onload.xml +7 -0
  1567. data/tests/illformed/sanitize/feed_tagline_onmousedown.xml +7 -0
  1568. data/tests/illformed/sanitize/feed_tagline_onmouseout.xml +7 -0
  1569. data/tests/illformed/sanitize/feed_tagline_onmouseover.xml +7 -0
  1570. data/tests/illformed/sanitize/feed_tagline_onmouseup.xml +7 -0
  1571. data/tests/illformed/sanitize/feed_tagline_onreset.xml +7 -0
  1572. data/tests/illformed/sanitize/feed_tagline_onresize.xml +7 -0
  1573. data/tests/illformed/sanitize/feed_tagline_onsubmit.xml +7 -0
  1574. data/tests/illformed/sanitize/feed_tagline_onunload.xml +7 -0
  1575. data/tests/illformed/sanitize/feed_tagline_script.xml +7 -0
  1576. data/tests/illformed/sanitize/feed_tagline_script_cdata.xml +7 -0
  1577. data/tests/illformed/sanitize/feed_tagline_script_inline.xml +7 -0
  1578. data/tests/illformed/sanitize/feed_tagline_script_map_description.xml +7 -0
  1579. data/tests/illformed/sanitize/feed_tagline_style.xml +7 -0
  1580. data/tests/illformed/sanitize/feed_title_applet.xml +7 -0
  1581. data/tests/illformed/sanitize/feed_title_blink.xml +7 -0
  1582. data/tests/illformed/sanitize/feed_title_crazy.xml +73 -0
  1583. data/tests/illformed/sanitize/feed_title_embed.xml +7 -0
  1584. data/tests/illformed/sanitize/feed_title_frame.xml +7 -0
  1585. data/tests/illformed/sanitize/feed_title_iframe.xml +7 -0
  1586. data/tests/illformed/sanitize/feed_title_link.xml +7 -0
  1587. data/tests/illformed/sanitize/feed_title_meta.xml +7 -0
  1588. data/tests/illformed/sanitize/feed_title_object.xml +7 -0
  1589. data/tests/illformed/sanitize/feed_title_onabort.xml +7 -0
  1590. data/tests/illformed/sanitize/feed_title_onblur.xml +7 -0
  1591. data/tests/illformed/sanitize/feed_title_onchange.xml +7 -0
  1592. data/tests/illformed/sanitize/feed_title_onclick.xml +7 -0
  1593. data/tests/illformed/sanitize/feed_title_ondblclick.xml +7 -0
  1594. data/tests/illformed/sanitize/feed_title_onerror.xml +7 -0
  1595. data/tests/illformed/sanitize/feed_title_onfocus.xml +7 -0
  1596. data/tests/illformed/sanitize/feed_title_onkeydown.xml +7 -0
  1597. data/tests/illformed/sanitize/feed_title_onkeypress.xml +7 -0
  1598. data/tests/illformed/sanitize/feed_title_onkeyup.xml +7 -0
  1599. data/tests/illformed/sanitize/feed_title_onload.xml +7 -0
  1600. data/tests/illformed/sanitize/feed_title_onmousedown.xml +7 -0
  1601. data/tests/illformed/sanitize/feed_title_onmouseout.xml +7 -0
  1602. data/tests/illformed/sanitize/feed_title_onmouseover.xml +7 -0
  1603. data/tests/illformed/sanitize/feed_title_onmouseup.xml +7 -0
  1604. data/tests/illformed/sanitize/feed_title_onreset.xml +7 -0
  1605. data/tests/illformed/sanitize/feed_title_onresize.xml +7 -0
  1606. data/tests/illformed/sanitize/feed_title_onsubmit.xml +7 -0
  1607. data/tests/illformed/sanitize/feed_title_onunload.xml +7 -0
  1608. data/tests/illformed/sanitize/feed_title_script.xml +7 -0
  1609. data/tests/illformed/sanitize/feed_title_script_cdata.xml +7 -0
  1610. data/tests/illformed/sanitize/feed_title_script_inline.xml +7 -0
  1611. data/tests/illformed/sanitize/feed_title_style.xml +7 -0
  1612. data/tests/illformed/sanitize/item_body_applet.xml +11 -0
  1613. data/tests/illformed/sanitize/item_body_blink.xml +11 -0
  1614. data/tests/illformed/sanitize/item_body_embed.xml +11 -0
  1615. data/tests/illformed/sanitize/item_body_frame.xml +11 -0
  1616. data/tests/illformed/sanitize/item_body_iframe.xml +11 -0
  1617. data/tests/illformed/sanitize/item_body_link.xml +11 -0
  1618. data/tests/illformed/sanitize/item_body_meta.xml +11 -0
  1619. data/tests/illformed/sanitize/item_body_object.xml +11 -0
  1620. data/tests/illformed/sanitize/item_body_onabort.xml +11 -0
  1621. data/tests/illformed/sanitize/item_body_onblur.xml +11 -0
  1622. data/tests/illformed/sanitize/item_body_onchange.xml +11 -0
  1623. data/tests/illformed/sanitize/item_body_onclick.xml +11 -0
  1624. data/tests/illformed/sanitize/item_body_ondblclick.xml +11 -0
  1625. data/tests/illformed/sanitize/item_body_onerror.xml +11 -0
  1626. data/tests/illformed/sanitize/item_body_onfocus.xml +11 -0
  1627. data/tests/illformed/sanitize/item_body_onkeydown.xml +11 -0
  1628. data/tests/illformed/sanitize/item_body_onkeypress.xml +11 -0
  1629. data/tests/illformed/sanitize/item_body_onkeyup.xml +11 -0
  1630. data/tests/illformed/sanitize/item_body_onload.xml +11 -0
  1631. data/tests/illformed/sanitize/item_body_onmousedown.xml +11 -0
  1632. data/tests/illformed/sanitize/item_body_onmouseout.xml +11 -0
  1633. data/tests/illformed/sanitize/item_body_onmouseover.xml +11 -0
  1634. data/tests/illformed/sanitize/item_body_onmouseup.xml +11 -0
  1635. data/tests/illformed/sanitize/item_body_onreset.xml +11 -0
  1636. data/tests/illformed/sanitize/item_body_onresize.xml +11 -0
  1637. data/tests/illformed/sanitize/item_body_onsubmit.xml +11 -0
  1638. data/tests/illformed/sanitize/item_body_onunload.xml +11 -0
  1639. data/tests/illformed/sanitize/item_body_script.xml +11 -0
  1640. data/tests/illformed/sanitize/item_body_script_map_content.xml +11 -0
  1641. data/tests/illformed/sanitize/item_body_style.xml +11 -0
  1642. data/tests/illformed/sanitize/item_content_encoded_applet.xml +11 -0
  1643. data/tests/illformed/sanitize/item_content_encoded_blink.xml +11 -0
  1644. data/tests/illformed/sanitize/item_content_encoded_crazy.xml +77 -0
  1645. data/tests/illformed/sanitize/item_content_encoded_embed.xml +11 -0
  1646. data/tests/illformed/sanitize/item_content_encoded_frame.xml +11 -0
  1647. data/tests/illformed/sanitize/item_content_encoded_iframe.xml +11 -0
  1648. data/tests/illformed/sanitize/item_content_encoded_link.xml +11 -0
  1649. data/tests/illformed/sanitize/item_content_encoded_map_content.xml +11 -0
  1650. data/tests/illformed/sanitize/item_content_encoded_meta.xml +11 -0
  1651. data/tests/illformed/sanitize/item_content_encoded_object.xml +11 -0
  1652. data/tests/illformed/sanitize/item_content_encoded_onabort.xml +11 -0
  1653. data/tests/illformed/sanitize/item_content_encoded_onblur.xml +11 -0
  1654. data/tests/illformed/sanitize/item_content_encoded_onchange.xml +11 -0
  1655. data/tests/illformed/sanitize/item_content_encoded_onclick.xml +11 -0
  1656. data/tests/illformed/sanitize/item_content_encoded_ondblclick.xml +11 -0
  1657. data/tests/illformed/sanitize/item_content_encoded_onerror.xml +11 -0
  1658. data/tests/illformed/sanitize/item_content_encoded_onfocus.xml +11 -0
  1659. data/tests/illformed/sanitize/item_content_encoded_onkeydown.xml +11 -0
  1660. data/tests/illformed/sanitize/item_content_encoded_onkeypress.xml +11 -0
  1661. data/tests/illformed/sanitize/item_content_encoded_onkeyup.xml +11 -0
  1662. data/tests/illformed/sanitize/item_content_encoded_onload.xml +11 -0
  1663. data/tests/illformed/sanitize/item_content_encoded_onmousedown.xml +11 -0
  1664. data/tests/illformed/sanitize/item_content_encoded_onmouseout.xml +11 -0
  1665. data/tests/illformed/sanitize/item_content_encoded_onmouseover.xml +11 -0
  1666. data/tests/illformed/sanitize/item_content_encoded_onmouseup.xml +11 -0
  1667. data/tests/illformed/sanitize/item_content_encoded_onreset.xml +11 -0
  1668. data/tests/illformed/sanitize/item_content_encoded_onresize.xml +11 -0
  1669. data/tests/illformed/sanitize/item_content_encoded_onsubmit.xml +11 -0
  1670. data/tests/illformed/sanitize/item_content_encoded_onunload.xml +11 -0
  1671. data/tests/illformed/sanitize/item_content_encoded_script.xml +11 -0
  1672. data/tests/illformed/sanitize/item_content_encoded_script_cdata.xml +11 -0
  1673. data/tests/illformed/sanitize/item_content_encoded_script_map_content.xml +11 -0
  1674. data/tests/illformed/sanitize/item_content_encoded_style.xml +11 -0
  1675. data/tests/illformed/sanitize/item_description_applet.xml +11 -0
  1676. data/tests/illformed/sanitize/item_description_blink.xml +11 -0
  1677. data/tests/illformed/sanitize/item_description_crazy.xml +81 -0
  1678. data/tests/illformed/sanitize/item_description_embed.xml +11 -0
  1679. data/tests/illformed/sanitize/item_description_frame.xml +11 -0
  1680. data/tests/illformed/sanitize/item_description_iframe.xml +11 -0
  1681. data/tests/illformed/sanitize/item_description_link.xml +11 -0
  1682. data/tests/illformed/sanitize/item_description_meta.xml +11 -0
  1683. data/tests/illformed/sanitize/item_description_object.xml +11 -0
  1684. data/tests/illformed/sanitize/item_description_onabort.xml +11 -0
  1685. data/tests/illformed/sanitize/item_description_onblur.xml +11 -0
  1686. data/tests/illformed/sanitize/item_description_onchange.xml +11 -0
  1687. data/tests/illformed/sanitize/item_description_onclick.xml +11 -0
  1688. data/tests/illformed/sanitize/item_description_ondblclick.xml +11 -0
  1689. data/tests/illformed/sanitize/item_description_onerror.xml +11 -0
  1690. data/tests/illformed/sanitize/item_description_onfocus.xml +11 -0
  1691. data/tests/illformed/sanitize/item_description_onkeydown.xml +11 -0
  1692. data/tests/illformed/sanitize/item_description_onkeypress.xml +11 -0
  1693. data/tests/illformed/sanitize/item_description_onkeyup.xml +11 -0
  1694. data/tests/illformed/sanitize/item_description_onload.xml +11 -0
  1695. data/tests/illformed/sanitize/item_description_onmousedown.xml +11 -0
  1696. data/tests/illformed/sanitize/item_description_onmouseout.xml +11 -0
  1697. data/tests/illformed/sanitize/item_description_onmouseover.xml +11 -0
  1698. data/tests/illformed/sanitize/item_description_onmouseup.xml +11 -0
  1699. data/tests/illformed/sanitize/item_description_onreset.xml +11 -0
  1700. data/tests/illformed/sanitize/item_description_onresize.xml +11 -0
  1701. data/tests/illformed/sanitize/item_description_onsubmit.xml +11 -0
  1702. data/tests/illformed/sanitize/item_description_onunload.xml +11 -0
  1703. data/tests/illformed/sanitize/item_description_script.xml +11 -0
  1704. data/tests/illformed/sanitize/item_description_script_cdata.xml +11 -0
  1705. data/tests/illformed/sanitize/item_description_script_map_summary.xml +11 -0
  1706. data/tests/illformed/sanitize/item_description_style.xml +11 -0
  1707. data/tests/illformed/sanitize/item_fullitem_applet.xml +11 -0
  1708. data/tests/illformed/sanitize/item_fullitem_blink.xml +11 -0
  1709. data/tests/illformed/sanitize/item_fullitem_crazy.xml +77 -0
  1710. data/tests/illformed/sanitize/item_fullitem_embed.xml +11 -0
  1711. data/tests/illformed/sanitize/item_fullitem_frame.xml +11 -0
  1712. data/tests/illformed/sanitize/item_fullitem_iframe.xml +11 -0
  1713. data/tests/illformed/sanitize/item_fullitem_link.xml +11 -0
  1714. data/tests/illformed/sanitize/item_fullitem_meta.xml +11 -0
  1715. data/tests/illformed/sanitize/item_fullitem_object.xml +11 -0
  1716. data/tests/illformed/sanitize/item_fullitem_onabort.xml +11 -0
  1717. data/tests/illformed/sanitize/item_fullitem_onblur.xml +11 -0
  1718. data/tests/illformed/sanitize/item_fullitem_onchange.xml +11 -0
  1719. data/tests/illformed/sanitize/item_fullitem_onclick.xml +11 -0
  1720. data/tests/illformed/sanitize/item_fullitem_ondblclick.xml +11 -0
  1721. data/tests/illformed/sanitize/item_fullitem_onerror.xml +11 -0
  1722. data/tests/illformed/sanitize/item_fullitem_onfocus.xml +11 -0
  1723. data/tests/illformed/sanitize/item_fullitem_onkeydown.xml +11 -0
  1724. data/tests/illformed/sanitize/item_fullitem_onkeypress.xml +11 -0
  1725. data/tests/illformed/sanitize/item_fullitem_onkeyup.xml +11 -0
  1726. data/tests/illformed/sanitize/item_fullitem_onload.xml +11 -0
  1727. data/tests/illformed/sanitize/item_fullitem_onmousedown.xml +11 -0
  1728. data/tests/illformed/sanitize/item_fullitem_onmouseout.xml +11 -0
  1729. data/tests/illformed/sanitize/item_fullitem_onmouseover.xml +11 -0
  1730. data/tests/illformed/sanitize/item_fullitem_onmouseup.xml +11 -0
  1731. data/tests/illformed/sanitize/item_fullitem_onreset.xml +11 -0
  1732. data/tests/illformed/sanitize/item_fullitem_onresize.xml +11 -0
  1733. data/tests/illformed/sanitize/item_fullitem_onsubmit.xml +11 -0
  1734. data/tests/illformed/sanitize/item_fullitem_onunload.xml +11 -0
  1735. data/tests/illformed/sanitize/item_fullitem_script.xml +11 -0
  1736. data/tests/illformed/sanitize/item_fullitem_script_cdata.xml +11 -0
  1737. data/tests/illformed/sanitize/item_fullitem_script_map_summary.xml +11 -0
  1738. data/tests/illformed/sanitize/item_fullitem_style.xml +11 -0
  1739. data/tests/illformed/sanitize/item_xhtml_body_applet.xml +11 -0
  1740. data/tests/illformed/sanitize/item_xhtml_body_blink.xml +11 -0
  1741. data/tests/illformed/sanitize/item_xhtml_body_embed.xml +11 -0
  1742. data/tests/illformed/sanitize/item_xhtml_body_frame.xml +11 -0
  1743. data/tests/illformed/sanitize/item_xhtml_body_iframe.xml +11 -0
  1744. data/tests/illformed/sanitize/item_xhtml_body_link.xml +11 -0
  1745. data/tests/illformed/sanitize/item_xhtml_body_meta.xml +11 -0
  1746. data/tests/illformed/sanitize/item_xhtml_body_object.xml +11 -0
  1747. data/tests/illformed/sanitize/item_xhtml_body_onabort.xml +11 -0
  1748. data/tests/illformed/sanitize/item_xhtml_body_onblur.xml +11 -0
  1749. data/tests/illformed/sanitize/item_xhtml_body_onchange.xml +11 -0
  1750. data/tests/illformed/sanitize/item_xhtml_body_onclick.xml +11 -0
  1751. data/tests/illformed/sanitize/item_xhtml_body_ondblclick.xml +11 -0
  1752. data/tests/illformed/sanitize/item_xhtml_body_onerror.xml +11 -0
  1753. data/tests/illformed/sanitize/item_xhtml_body_onfocus.xml +11 -0
  1754. data/tests/illformed/sanitize/item_xhtml_body_onkeydown.xml +11 -0
  1755. data/tests/illformed/sanitize/item_xhtml_body_onkeypress.xml +11 -0
  1756. data/tests/illformed/sanitize/item_xhtml_body_onkeyup.xml +11 -0
  1757. data/tests/illformed/sanitize/item_xhtml_body_onload.xml +11 -0
  1758. data/tests/illformed/sanitize/item_xhtml_body_onmousedown.xml +11 -0
  1759. data/tests/illformed/sanitize/item_xhtml_body_onmouseout.xml +11 -0
  1760. data/tests/illformed/sanitize/item_xhtml_body_onmouseover.xml +11 -0
  1761. data/tests/illformed/sanitize/item_xhtml_body_onmouseup.xml +11 -0
  1762. data/tests/illformed/sanitize/item_xhtml_body_onreset.xml +11 -0
  1763. data/tests/illformed/sanitize/item_xhtml_body_onresize.xml +11 -0
  1764. data/tests/illformed/sanitize/item_xhtml_body_onsubmit.xml +11 -0
  1765. data/tests/illformed/sanitize/item_xhtml_body_onunload.xml +11 -0
  1766. data/tests/illformed/sanitize/item_xhtml_body_script.xml +11 -0
  1767. data/tests/illformed/sanitize/item_xhtml_body_script_map_content.xml +11 -0
  1768. data/tests/illformed/sanitize/item_xhtml_body_style.xml +11 -0
  1769. data/tests/wellformed/amp/amp01.xml +9 -0
  1770. data/tests/wellformed/amp/amp02.xml +9 -0
  1771. data/tests/wellformed/amp/amp03.xml +9 -0
  1772. data/tests/wellformed/amp/amp04.xml +9 -0
  1773. data/tests/wellformed/amp/amp05.xml +9 -0
  1774. data/tests/wellformed/amp/amp06.xml +9 -0
  1775. data/tests/wellformed/amp/amp07.xml +9 -0
  1776. data/tests/wellformed/amp/amp08.xml +9 -0
  1777. data/tests/wellformed/amp/amp09.xml +9 -0
  1778. data/tests/wellformed/amp/amp10.xml +9 -0
  1779. data/tests/wellformed/amp/amp11.xml +9 -0
  1780. data/tests/wellformed/amp/amp12.xml +9 -0
  1781. data/tests/wellformed/amp/amp13.xml +9 -0
  1782. data/tests/wellformed/amp/amp14.xml +9 -0
  1783. data/tests/wellformed/amp/amp15.xml +9 -0
  1784. data/tests/wellformed/amp/amp16.xml +9 -0
  1785. data/tests/wellformed/amp/amp17.xml +9 -0
  1786. data/tests/wellformed/amp/amp18.xml +9 -0
  1787. data/tests/wellformed/amp/amp19.xml +9 -0
  1788. data/tests/wellformed/amp/amp20.xml +9 -0
  1789. data/tests/wellformed/amp/amp21.xml +9 -0
  1790. data/tests/wellformed/amp/amp22.xml +9 -0
  1791. data/tests/wellformed/amp/amp23.xml +9 -0
  1792. data/tests/wellformed/amp/amp24.xml +9 -0
  1793. data/tests/wellformed/amp/amp25.xml +9 -0
  1794. data/tests/wellformed/amp/amp26.xml +9 -0
  1795. data/tests/wellformed/amp/amp27.xml +9 -0
  1796. data/tests/wellformed/amp/amp28.xml +9 -0
  1797. data/tests/wellformed/amp/amp29.xml +9 -0
  1798. data/tests/wellformed/amp/amp30.xml +9 -0
  1799. data/tests/wellformed/amp/amp31.xml +9 -0
  1800. data/tests/wellformed/amp/amp32.xml +9 -0
  1801. data/tests/wellformed/amp/amp33.xml +9 -0
  1802. data/tests/wellformed/amp/amp34.xml +9 -0
  1803. data/tests/wellformed/amp/amp35.xml +9 -0
  1804. data/tests/wellformed/amp/amp36.xml +9 -0
  1805. data/tests/wellformed/amp/amp37.xml +9 -0
  1806. data/tests/wellformed/amp/amp38.xml +9 -0
  1807. data/tests/wellformed/amp/amp39.xml +9 -0
  1808. data/tests/wellformed/amp/amp40.xml +9 -0
  1809. data/tests/wellformed/amp/amp41.xml +9 -0
  1810. data/tests/wellformed/amp/amp42.xml +9 -0
  1811. data/tests/wellformed/amp/amp43.xml +9 -0
  1812. data/tests/wellformed/amp/amp44.xml +9 -0
  1813. data/tests/wellformed/amp/amp45.xml +9 -0
  1814. data/tests/wellformed/amp/amp46.xml +9 -0
  1815. data/tests/wellformed/amp/amp47.xml +9 -0
  1816. data/tests/wellformed/amp/amp48.xml +9 -0
  1817. data/tests/wellformed/amp/amp49.xml +9 -0
  1818. data/tests/wellformed/amp/amp50.xml +9 -0
  1819. data/tests/wellformed/amp/amp51.xml +9 -0
  1820. data/tests/wellformed/amp/amp52.xml +9 -0
  1821. data/tests/wellformed/amp/amp53.xml +9 -0
  1822. data/tests/wellformed/amp/amp54.xml +9 -0
  1823. data/tests/wellformed/amp/amp55.xml +9 -0
  1824. data/tests/wellformed/amp/amp56.xml +9 -0
  1825. data/tests/wellformed/amp/amp57.xml +9 -0
  1826. data/tests/wellformed/amp/amp58.xml +9 -0
  1827. data/tests/wellformed/amp/amp59.xml +9 -0
  1828. data/tests/wellformed/amp/amp60.xml +9 -0
  1829. data/tests/wellformed/amp/amp61.xml +9 -0
  1830. data/tests/wellformed/amp/amp62.xml +9 -0
  1831. data/tests/wellformed/amp/amp63.xml +9 -0
  1832. data/tests/wellformed/amp/amp64.xml +9 -0
  1833. data/tests/wellformed/atom/atom_namespace_1.xml +7 -0
  1834. data/tests/wellformed/atom/atom_namespace_2.xml +7 -0
  1835. data/tests/wellformed/atom/atom_namespace_3.xml +7 -0
  1836. data/tests/wellformed/atom/atom_namespace_4.xml +7 -0
  1837. data/tests/wellformed/atom/atom_namespace_5.xml +7 -0
  1838. data/tests/wellformed/atom/entry_author_email.xml +13 -0
  1839. data/tests/wellformed/atom/entry_author_homepage.xml +13 -0
  1840. data/tests/wellformed/atom/entry_author_map_author.xml +13 -0
  1841. data/tests/wellformed/atom/entry_author_map_author_2.xml +12 -0
  1842. data/tests/wellformed/atom/entry_author_name.xml +13 -0
  1843. data/tests/wellformed/atom/entry_author_uri.xml +13 -0
  1844. data/tests/wellformed/atom/entry_author_url.xml +13 -0
  1845. data/tests/wellformed/atom/entry_content_mode_base64.xml +11 -0
  1846. data/tests/wellformed/atom/entry_content_mode_escaped.xml +9 -0
  1847. data/tests/wellformed/atom/entry_content_type.xml +9 -0
  1848. data/tests/wellformed/atom/entry_content_type_text_plain.xml +9 -0
  1849. data/tests/wellformed/atom/entry_content_value.xml +9 -0
  1850. data/tests/wellformed/atom/entry_contributor_email.xml +13 -0
  1851. data/tests/wellformed/atom/entry_contributor_homepage.xml +13 -0
  1852. data/tests/wellformed/atom/entry_contributor_multiple.xml +18 -0
  1853. data/tests/wellformed/atom/entry_contributor_name.xml +13 -0
  1854. data/tests/wellformed/atom/entry_contributor_uri.xml +13 -0
  1855. data/tests/wellformed/atom/entry_contributor_url.xml +13 -0
  1856. data/tests/wellformed/atom/entry_id.xml +9 -0
  1857. data/tests/wellformed/atom/entry_id_map_guid.xml +9 -0
  1858. data/tests/wellformed/atom/entry_link_alternate_map_link.xml +9 -0
  1859. data/tests/wellformed/atom/entry_link_alternate_map_link_2.xml +9 -0
  1860. data/tests/wellformed/atom/entry_link_href.xml +9 -0
  1861. data/tests/wellformed/atom/entry_link_multiple.xml +10 -0
  1862. data/tests/wellformed/atom/entry_link_rel.xml +9 -0
  1863. data/tests/wellformed/atom/entry_link_title.xml +9 -0
  1864. data/tests/wellformed/atom/entry_link_type.xml +9 -0
  1865. data/tests/wellformed/atom/entry_summary.xml +9 -0
  1866. data/tests/wellformed/atom/entry_summary_base64.xml +11 -0
  1867. data/tests/wellformed/atom/entry_summary_base64_2.xml +11 -0
  1868. data/tests/wellformed/atom/entry_summary_content_mode_base64.xml +11 -0
  1869. data/tests/wellformed/atom/entry_summary_content_mode_escaped.xml +9 -0
  1870. data/tests/wellformed/atom/entry_summary_content_type.xml +9 -0
  1871. data/tests/wellformed/atom/entry_summary_content_type_text_plain.xml +9 -0
  1872. data/tests/wellformed/atom/entry_summary_content_value.xml +9 -0
  1873. data/tests/wellformed/atom/entry_summary_escaped_markup.xml +9 -0
  1874. data/tests/wellformed/atom/entry_summary_inline_markup.xml +9 -0
  1875. data/tests/wellformed/atom/entry_summary_inline_markup_2.xml +9 -0
  1876. data/tests/wellformed/atom/entry_summary_naked_markup.xml +9 -0
  1877. data/tests/wellformed/atom/entry_summary_text_plain.xml +9 -0
  1878. data/tests/wellformed/atom/entry_title.xml +9 -0
  1879. data/tests/wellformed/atom/entry_title_base64.xml +11 -0
  1880. data/tests/wellformed/atom/entry_title_base64_2.xml +11 -0
  1881. data/tests/wellformed/atom/entry_title_content_mode_base64.xml +11 -0
  1882. data/tests/wellformed/atom/entry_title_content_mode_escaped.xml +9 -0
  1883. data/tests/wellformed/atom/entry_title_content_type.xml +9 -0
  1884. data/tests/wellformed/atom/entry_title_content_type_text_plain.xml +9 -0
  1885. data/tests/wellformed/atom/entry_title_content_value.xml +9 -0
  1886. data/tests/wellformed/atom/entry_title_escaped_markup.xml +9 -0
  1887. data/tests/wellformed/atom/entry_title_inline_markup.xml +9 -0
  1888. data/tests/wellformed/atom/entry_title_inline_markup_2.xml +9 -0
  1889. data/tests/wellformed/atom/entry_title_naked_markup.xml +9 -0
  1890. data/tests/wellformed/atom/entry_title_text_plain.xml +9 -0
  1891. data/tests/wellformed/atom/entry_title_text_plain_brackets.xml +9 -0
  1892. data/tests/wellformed/atom/feed_author_email.xml +11 -0
  1893. data/tests/wellformed/atom/feed_author_homepage.xml +11 -0
  1894. data/tests/wellformed/atom/feed_author_map_author.xml +11 -0
  1895. data/tests/wellformed/atom/feed_author_map_author_2.xml +10 -0
  1896. data/tests/wellformed/atom/feed_author_name.xml +11 -0
  1897. data/tests/wellformed/atom/feed_author_uri.xml +11 -0
  1898. data/tests/wellformed/atom/feed_author_url.xml +11 -0
  1899. data/tests/wellformed/atom/feed_contributor_email.xml +11 -0
  1900. data/tests/wellformed/atom/feed_contributor_homepage.xml +11 -0
  1901. data/tests/wellformed/atom/feed_contributor_multiple.xml +16 -0
  1902. data/tests/wellformed/atom/feed_contributor_name.xml +11 -0
  1903. data/tests/wellformed/atom/feed_contributor_uri.xml +11 -0
  1904. data/tests/wellformed/atom/feed_contributor_url.xml +11 -0
  1905. data/tests/wellformed/atom/feed_copyright.xml +7 -0
  1906. data/tests/wellformed/atom/feed_copyright_base64.xml +9 -0
  1907. data/tests/wellformed/atom/feed_copyright_base64_2.xml +9 -0
  1908. data/tests/wellformed/atom/feed_copyright_content_mode_base64.xml +9 -0
  1909. data/tests/wellformed/atom/feed_copyright_content_mode_escaped.xml +7 -0
  1910. data/tests/wellformed/atom/feed_copyright_content_type.xml +7 -0
  1911. data/tests/wellformed/atom/feed_copyright_content_type_text_plain.xml +7 -0
  1912. data/tests/wellformed/atom/feed_copyright_content_value.xml +7 -0
  1913. data/tests/wellformed/atom/feed_copyright_escaped_markup.xml +7 -0
  1914. data/tests/wellformed/atom/feed_copyright_inline_markup.xml +7 -0
  1915. data/tests/wellformed/atom/feed_copyright_inline_markup_2.xml +7 -0
  1916. data/tests/wellformed/atom/feed_copyright_naked_markup.xml +7 -0
  1917. data/tests/wellformed/atom/feed_copyright_text_plain.xml +7 -0
  1918. data/tests/wellformed/atom/feed_generator.xml +7 -0
  1919. data/tests/wellformed/atom/feed_generator_name.xml +7 -0
  1920. data/tests/wellformed/atom/feed_generator_url.xml +7 -0
  1921. data/tests/wellformed/atom/feed_generator_version.xml +7 -0
  1922. data/tests/wellformed/atom/feed_id.xml +7 -0
  1923. data/tests/wellformed/atom/feed_id_map_guid.xml +7 -0
  1924. data/tests/wellformed/atom/feed_info.xml +7 -0
  1925. data/tests/wellformed/atom/feed_info_base64.xml +9 -0
  1926. data/tests/wellformed/atom/feed_info_base64_2.xml +9 -0
  1927. data/tests/wellformed/atom/feed_info_content_mode_base64.xml +9 -0
  1928. data/tests/wellformed/atom/feed_info_content_mode_escaped.xml +7 -0
  1929. data/tests/wellformed/atom/feed_info_content_type.xml +7 -0
  1930. data/tests/wellformed/atom/feed_info_content_type_text_plain.xml +7 -0
  1931. data/tests/wellformed/atom/feed_info_content_value.xml +7 -0
  1932. data/tests/wellformed/atom/feed_info_escaped_markup.xml +7 -0
  1933. data/tests/wellformed/atom/feed_info_inline_markup.xml +7 -0
  1934. data/tests/wellformed/atom/feed_info_inline_markup_2.xml +7 -0
  1935. data/tests/wellformed/atom/feed_info_naked_markup.xml +7 -0
  1936. data/tests/wellformed/atom/feed_info_text_plain.xml +7 -0
  1937. data/tests/wellformed/atom/feed_link_alternate_map_link.xml +7 -0
  1938. data/tests/wellformed/atom/feed_link_alternate_map_link_2.xml +7 -0
  1939. data/tests/wellformed/atom/feed_link_href.xml +7 -0
  1940. data/tests/wellformed/atom/feed_link_multiple.xml +8 -0
  1941. data/tests/wellformed/atom/feed_link_rel.xml +7 -0
  1942. data/tests/wellformed/atom/feed_link_title.xml +7 -0
  1943. data/tests/wellformed/atom/feed_link_type.xml +7 -0
  1944. data/tests/wellformed/atom/feed_tagline.xml +7 -0
  1945. data/tests/wellformed/atom/feed_tagline_base64.xml +9 -0
  1946. data/tests/wellformed/atom/feed_tagline_base64_2.xml +9 -0
  1947. data/tests/wellformed/atom/feed_tagline_content_mode_base64.xml +9 -0
  1948. data/tests/wellformed/atom/feed_tagline_content_mode_escaped.xml +7 -0
  1949. data/tests/wellformed/atom/feed_tagline_content_type.xml +7 -0
  1950. data/tests/wellformed/atom/feed_tagline_content_type_text_plain.xml +7 -0
  1951. data/tests/wellformed/atom/feed_tagline_content_value.xml +7 -0
  1952. data/tests/wellformed/atom/feed_tagline_escaped_markup.xml +7 -0
  1953. data/tests/wellformed/atom/feed_tagline_inline_markup.xml +7 -0
  1954. data/tests/wellformed/atom/feed_tagline_inline_markup_2.xml +7 -0
  1955. data/tests/wellformed/atom/feed_tagline_naked_markup.xml +7 -0
  1956. data/tests/wellformed/atom/feed_tagline_text_plain.xml +7 -0
  1957. data/tests/wellformed/atom/feed_title.xml +7 -0
  1958. data/tests/wellformed/atom/feed_title_base64.xml +9 -0
  1959. data/tests/wellformed/atom/feed_title_base64_2.xml +9 -0
  1960. data/tests/wellformed/atom/feed_title_content_mode_base64.xml +9 -0
  1961. data/tests/wellformed/atom/feed_title_content_mode_escaped.xml +7 -0
  1962. data/tests/wellformed/atom/feed_title_content_type.xml +7 -0
  1963. data/tests/wellformed/atom/feed_title_content_type_text_plain.xml +7 -0
  1964. data/tests/wellformed/atom/feed_title_content_value.xml +7 -0
  1965. data/tests/wellformed/atom/feed_title_escaped_markup.xml +7 -0
  1966. data/tests/wellformed/atom/feed_title_inline_markup.xml +7 -0
  1967. data/tests/wellformed/atom/feed_title_inline_markup_2.xml +7 -0
  1968. data/tests/wellformed/atom/feed_title_naked_markup.xml +7 -0
  1969. data/tests/wellformed/atom/feed_title_text_plain.xml +7 -0
  1970. data/tests/wellformed/atom/relative_uri.xml +7 -0
  1971. data/tests/wellformed/atom/relative_uri_inherit.xml +7 -0
  1972. data/tests/wellformed/atom/relative_uri_inherit_2.xml +7 -0
  1973. data/tests/wellformed/atom10/atom10_namespace.xml +7 -0
  1974. data/tests/wellformed/atom10/atom10_version.xml +6 -0
  1975. data/tests/wellformed/atom10/entry_author_email.xml +13 -0
  1976. data/tests/wellformed/atom10/entry_author_map_author.xml +13 -0
  1977. data/tests/wellformed/atom10/entry_author_map_author_2.xml +12 -0
  1978. data/tests/wellformed/atom10/entry_author_name.xml +13 -0
  1979. data/tests/wellformed/atom10/entry_author_uri.xml +13 -0
  1980. data/tests/wellformed/atom10/entry_author_url.xml +13 -0
  1981. data/tests/wellformed/atom10/entry_category_label.xml +9 -0
  1982. data/tests/wellformed/atom10/entry_category_scheme.xml +9 -0
  1983. data/tests/wellformed/atom10/entry_category_term.xml +9 -0
  1984. data/tests/wellformed/atom10/entry_content_application_xml.xml +9 -0
  1985. data/tests/wellformed/atom10/entry_content_base64.xml +11 -0
  1986. data/tests/wellformed/atom10/entry_content_base64_2.xml +11 -0
  1987. data/tests/wellformed/atom10/entry_content_escaped_markup.xml +9 -0
  1988. data/tests/wellformed/atom10/entry_content_inline_markup.xml +9 -0
  1989. data/tests/wellformed/atom10/entry_content_inline_markup_2.xml +9 -0
  1990. data/tests/wellformed/atom10/entry_content_src.xml +9 -0
  1991. data/tests/wellformed/atom10/entry_content_text_plain.xml +9 -0
  1992. data/tests/wellformed/atom10/entry_content_text_plain_brackets.xml +9 -0
  1993. data/tests/wellformed/atom10/entry_content_type.xml +9 -0
  1994. data/tests/wellformed/atom10/entry_content_type_text.xml +9 -0
  1995. data/tests/wellformed/atom10/entry_content_value.xml +9 -0
  1996. data/tests/wellformed/atom10/entry_contributor_email.xml +13 -0
  1997. data/tests/wellformed/atom10/entry_contributor_multiple.xml +18 -0
  1998. data/tests/wellformed/atom10/entry_contributor_name.xml +13 -0
  1999. data/tests/wellformed/atom10/entry_contributor_uri.xml +13 -0
  2000. data/tests/wellformed/atom10/entry_contributor_url.xml +13 -0
  2001. data/tests/wellformed/atom10/entry_id.xml +9 -0
  2002. data/tests/wellformed/atom10/entry_id_map_guid.xml +9 -0
  2003. data/tests/wellformed/atom10/entry_id_no_normalization_1.xml +9 -0
  2004. data/tests/wellformed/atom10/entry_id_no_normalization_2.xml +9 -0
  2005. data/tests/wellformed/atom10/entry_id_no_normalization_3.xml +9 -0
  2006. data/tests/wellformed/atom10/entry_id_no_normalization_4.xml +9 -0
  2007. data/tests/wellformed/atom10/entry_id_no_normalization_5.xml +9 -0
  2008. data/tests/wellformed/atom10/entry_id_no_normalization_6.xml +9 -0
  2009. data/tests/wellformed/atom10/entry_id_no_normalization_7.xml +9 -0
  2010. data/tests/wellformed/atom10/entry_link_alternate_map_link.xml +9 -0
  2011. data/tests/wellformed/atom10/entry_link_alternate_map_link_2.xml +9 -0
  2012. data/tests/wellformed/atom10/entry_link_alternate_map_link_3.xml +11 -0
  2013. data/tests/wellformed/atom10/entry_link_href.xml +9 -0
  2014. data/tests/wellformed/atom10/entry_link_hreflang.xml +9 -0
  2015. data/tests/wellformed/atom10/entry_link_length.xml +9 -0
  2016. data/tests/wellformed/atom10/entry_link_multiple.xml +10 -0
  2017. data/tests/wellformed/atom10/entry_link_no_rel.xml +9 -0
  2018. data/tests/wellformed/atom10/entry_link_rel.xml +9 -0
  2019. data/tests/wellformed/atom10/entry_link_rel_enclosure.xml +9 -0
  2020. data/tests/wellformed/atom10/entry_link_rel_enclosure_map_enclosure_length.xml +9 -0
  2021. data/tests/wellformed/atom10/entry_link_rel_enclosure_map_enclosure_type.xml +9 -0
  2022. data/tests/wellformed/atom10/entry_link_rel_enclosure_map_enclosure_url.xml +9 -0
  2023. data/tests/wellformed/atom10/entry_link_rel_other.xml +9 -0
  2024. data/tests/wellformed/atom10/entry_link_rel_related.xml +9 -0
  2025. data/tests/wellformed/atom10/entry_link_rel_self.xml +9 -0
  2026. data/tests/wellformed/atom10/entry_link_rel_via.xml +9 -0
  2027. data/tests/wellformed/atom10/entry_link_title.xml +9 -0
  2028. data/tests/wellformed/atom10/entry_link_type.xml +9 -0
  2029. data/tests/wellformed/atom10/entry_rights.xml +9 -0
  2030. data/tests/wellformed/atom10/entry_rights_content_value.xml +9 -0
  2031. data/tests/wellformed/atom10/entry_rights_escaped_markup.xml +9 -0
  2032. data/tests/wellformed/atom10/entry_rights_inline_markup.xml +9 -0
  2033. data/tests/wellformed/atom10/entry_rights_inline_markup_2.xml +9 -0
  2034. data/tests/wellformed/atom10/entry_rights_text_plain.xml +9 -0
  2035. data/tests/wellformed/atom10/entry_rights_text_plain_brackets.xml +9 -0
  2036. data/tests/wellformed/atom10/entry_rights_type_default.xml +9 -0
  2037. data/tests/wellformed/atom10/entry_rights_type_text.xml +9 -0
  2038. data/tests/wellformed/atom10/entry_source_author_email.xml +15 -0
  2039. data/tests/wellformed/atom10/entry_source_author_map_author.xml +15 -0
  2040. data/tests/wellformed/atom10/entry_source_author_map_author_2.xml +14 -0
  2041. data/tests/wellformed/atom10/entry_source_author_name.xml +15 -0
  2042. data/tests/wellformed/atom10/entry_source_author_uri.xml +15 -0
  2043. data/tests/wellformed/atom10/entry_source_category_label.xml +11 -0
  2044. data/tests/wellformed/atom10/entry_source_category_scheme.xml +11 -0
  2045. data/tests/wellformed/atom10/entry_source_category_term.xml +11 -0
  2046. data/tests/wellformed/atom10/entry_source_contributor_email.xml +15 -0
  2047. data/tests/wellformed/atom10/entry_source_contributor_multiple.xml +20 -0
  2048. data/tests/wellformed/atom10/entry_source_contributor_name.xml +15 -0
  2049. data/tests/wellformed/atom10/entry_source_contributor_uri.xml +15 -0
  2050. data/tests/wellformed/atom10/entry_source_generator.xml +11 -0
  2051. data/tests/wellformed/atom10/entry_source_generator_name.xml +11 -0
  2052. data/tests/wellformed/atom10/entry_source_generator_uri.xml +11 -0
  2053. data/tests/wellformed/atom10/entry_source_generator_version.xml +11 -0
  2054. data/tests/wellformed/atom10/entry_source_icon.xml +11 -0
  2055. data/tests/wellformed/atom10/entry_source_id.xml +11 -0
  2056. data/tests/wellformed/atom10/entry_source_link_alternate_map_link.xml +11 -0
  2057. data/tests/wellformed/atom10/entry_source_link_alternate_map_link_2.xml +11 -0
  2058. data/tests/wellformed/atom10/entry_source_link_href.xml +11 -0
  2059. data/tests/wellformed/atom10/entry_source_link_hreflang.xml +11 -0
  2060. data/tests/wellformed/atom10/entry_source_link_length.xml +11 -0
  2061. data/tests/wellformed/atom10/entry_source_link_multiple.xml +12 -0
  2062. data/tests/wellformed/atom10/entry_source_link_no_rel.xml +11 -0
  2063. data/tests/wellformed/atom10/entry_source_link_rel.xml +11 -0
  2064. data/tests/wellformed/atom10/entry_source_link_rel_other.xml +11 -0
  2065. data/tests/wellformed/atom10/entry_source_link_rel_related.xml +11 -0
  2066. data/tests/wellformed/atom10/entry_source_link_rel_self.xml +11 -0
  2067. data/tests/wellformed/atom10/entry_source_link_rel_via.xml +11 -0
  2068. data/tests/wellformed/atom10/entry_source_link_title.xml +11 -0
  2069. data/tests/wellformed/atom10/entry_source_link_type.xml +11 -0
  2070. data/tests/wellformed/atom10/entry_source_logo.xml +11 -0
  2071. data/tests/wellformed/atom10/entry_source_rights.xml +11 -0
  2072. data/tests/wellformed/atom10/entry_source_rights_base64.xml +13 -0
  2073. data/tests/wellformed/atom10/entry_source_rights_base64_2.xml +13 -0
  2074. data/tests/wellformed/atom10/entry_source_rights_content_type.xml +11 -0
  2075. data/tests/wellformed/atom10/entry_source_rights_content_type_text.xml +11 -0
  2076. data/tests/wellformed/atom10/entry_source_rights_content_value.xml +11 -0
  2077. data/tests/wellformed/atom10/entry_source_rights_escaped_markup.xml +11 -0
  2078. data/tests/wellformed/atom10/entry_source_rights_inline_markup.xml +11 -0
  2079. data/tests/wellformed/atom10/entry_source_rights_inline_markup_2.xml +11 -0
  2080. data/tests/wellformed/atom10/entry_source_rights_text_plain.xml +11 -0
  2081. data/tests/wellformed/atom10/entry_source_subittle_content_type_text.xml +11 -0
  2082. data/tests/wellformed/atom10/entry_source_subtitle.xml +11 -0
  2083. data/tests/wellformed/atom10/entry_source_subtitle_base64.xml +13 -0
  2084. data/tests/wellformed/atom10/entry_source_subtitle_base64_2.xml +13 -0
  2085. data/tests/wellformed/atom10/entry_source_subtitle_content_type.xml +11 -0
  2086. data/tests/wellformed/atom10/entry_source_subtitle_content_value.xml +11 -0
  2087. data/tests/wellformed/atom10/entry_source_subtitle_escaped_markup.xml +11 -0
  2088. data/tests/wellformed/atom10/entry_source_subtitle_inline_markup.xml +11 -0
  2089. data/tests/wellformed/atom10/entry_source_subtitle_inline_markup_2.xml +11 -0
  2090. data/tests/wellformed/atom10/entry_source_subtitle_text_plain.xml +11 -0
  2091. data/tests/wellformed/atom10/entry_source_title.xml +11 -0
  2092. data/tests/wellformed/atom10/entry_source_title_base64.xml +13 -0
  2093. data/tests/wellformed/atom10/entry_source_title_base64_2.xml +13 -0
  2094. data/tests/wellformed/atom10/entry_source_title_content_type.xml +11 -0
  2095. data/tests/wellformed/atom10/entry_source_title_content_type_text.xml +11 -0
  2096. data/tests/wellformed/atom10/entry_source_title_content_value.xml +11 -0
  2097. data/tests/wellformed/atom10/entry_source_title_escaped_markup.xml +11 -0
  2098. data/tests/wellformed/atom10/entry_source_title_inline_markup.xml +11 -0
  2099. data/tests/wellformed/atom10/entry_source_title_inline_markup_2.xml +11 -0
  2100. data/tests/wellformed/atom10/entry_source_title_text_plain.xml +11 -0
  2101. data/tests/wellformed/atom10/entry_summary.xml +9 -0
  2102. data/tests/wellformed/atom10/entry_summary_base64.xml +11 -0
  2103. data/tests/wellformed/atom10/entry_summary_base64_2.xml +11 -0
  2104. data/tests/wellformed/atom10/entry_summary_content_value.xml +9 -0
  2105. data/tests/wellformed/atom10/entry_summary_escaped_markup.xml +9 -0
  2106. data/tests/wellformed/atom10/entry_summary_inline_markup.xml +9 -0
  2107. data/tests/wellformed/atom10/entry_summary_inline_markup_2.xml +9 -0
  2108. data/tests/wellformed/atom10/entry_summary_text_plain.xml +9 -0
  2109. data/tests/wellformed/atom10/entry_summary_type_default.xml +9 -0
  2110. data/tests/wellformed/atom10/entry_summary_type_text.xml +9 -0
  2111. data/tests/wellformed/atom10/entry_title.xml +9 -0
  2112. data/tests/wellformed/atom10/entry_title_base64.xml +11 -0
  2113. data/tests/wellformed/atom10/entry_title_base64_2.xml +11 -0
  2114. data/tests/wellformed/atom10/entry_title_content_value.xml +9 -0
  2115. data/tests/wellformed/atom10/entry_title_escaped_markup.xml +9 -0
  2116. data/tests/wellformed/atom10/entry_title_inline_markup.xml +9 -0
  2117. data/tests/wellformed/atom10/entry_title_inline_markup_2.xml +9 -0
  2118. data/tests/wellformed/atom10/entry_title_text_plain.xml +9 -0
  2119. data/tests/wellformed/atom10/entry_title_text_plain_brackets.xml +9 -0
  2120. data/tests/wellformed/atom10/entry_title_type_default.xml +9 -0
  2121. data/tests/wellformed/atom10/entry_title_type_text.xml +9 -0
  2122. data/tests/wellformed/atom10/feed_author_email.xml +11 -0
  2123. data/tests/wellformed/atom10/feed_author_map_author.xml +11 -0
  2124. data/tests/wellformed/atom10/feed_author_map_author_2.xml +10 -0
  2125. data/tests/wellformed/atom10/feed_author_name.xml +11 -0
  2126. data/tests/wellformed/atom10/feed_author_uri.xml +11 -0
  2127. data/tests/wellformed/atom10/feed_author_url.xml +11 -0
  2128. data/tests/wellformed/atom10/feed_contributor_email.xml +11 -0
  2129. data/tests/wellformed/atom10/feed_contributor_multiple.xml +16 -0
  2130. data/tests/wellformed/atom10/feed_contributor_name.xml +11 -0
  2131. data/tests/wellformed/atom10/feed_contributor_uri.xml +11 -0
  2132. data/tests/wellformed/atom10/feed_contributor_url.xml +11 -0
  2133. data/tests/wellformed/atom10/feed_generator.xml +7 -0
  2134. data/tests/wellformed/atom10/feed_generator_name.xml +7 -0
  2135. data/tests/wellformed/atom10/feed_generator_url.xml +7 -0
  2136. data/tests/wellformed/atom10/feed_generator_version.xml +7 -0
  2137. data/tests/wellformed/atom10/feed_icon.xml +7 -0
  2138. data/tests/wellformed/atom10/feed_id.xml +7 -0
  2139. data/tests/wellformed/atom10/feed_id_map_guid.xml +7 -0
  2140. data/tests/wellformed/atom10/feed_link_alternate_map_link.xml +7 -0
  2141. data/tests/wellformed/atom10/feed_link_alternate_map_link_2.xml +7 -0
  2142. data/tests/wellformed/atom10/feed_link_href.xml +7 -0
  2143. data/tests/wellformed/atom10/feed_link_hreflang.xml +7 -0
  2144. data/tests/wellformed/atom10/feed_link_length.xml +7 -0
  2145. data/tests/wellformed/atom10/feed_link_multiple.xml +8 -0
  2146. data/tests/wellformed/atom10/feed_link_no_rel.xml +7 -0
  2147. data/tests/wellformed/atom10/feed_link_rel.xml +7 -0
  2148. data/tests/wellformed/atom10/feed_link_rel_other.xml +7 -0
  2149. data/tests/wellformed/atom10/feed_link_rel_related.xml +7 -0
  2150. data/tests/wellformed/atom10/feed_link_rel_self.xml +7 -0
  2151. data/tests/wellformed/atom10/feed_link_rel_via.xml +7 -0
  2152. data/tests/wellformed/atom10/feed_link_title.xml +7 -0
  2153. data/tests/wellformed/atom10/feed_link_type.xml +7 -0
  2154. data/tests/wellformed/atom10/feed_logo.xml +7 -0
  2155. data/tests/wellformed/atom10/feed_rights.xml +7 -0
  2156. data/tests/wellformed/atom10/feed_rights_base64.xml +9 -0
  2157. data/tests/wellformed/atom10/feed_rights_base64_2.xml +9 -0
  2158. data/tests/wellformed/atom10/feed_rights_content_type.xml +7 -0
  2159. data/tests/wellformed/atom10/feed_rights_content_type_text.xml +7 -0
  2160. data/tests/wellformed/atom10/feed_rights_content_value.xml +7 -0
  2161. data/tests/wellformed/atom10/feed_rights_escaped_markup.xml +7 -0
  2162. data/tests/wellformed/atom10/feed_rights_inline_markup.xml +7 -0
  2163. data/tests/wellformed/atom10/feed_rights_inline_markup_2.xml +7 -0
  2164. data/tests/wellformed/atom10/feed_rights_text_plain.xml +7 -0
  2165. data/tests/wellformed/atom10/feed_subtitle.xml +7 -0
  2166. data/tests/wellformed/atom10/feed_subtitle_base64.xml +9 -0
  2167. data/tests/wellformed/atom10/feed_subtitle_base64_2.xml +9 -0
  2168. data/tests/wellformed/atom10/feed_subtitle_content_type.xml +7 -0
  2169. data/tests/wellformed/atom10/feed_subtitle_content_type_text.xml +7 -0
  2170. data/tests/wellformed/atom10/feed_subtitle_content_value.xml +7 -0
  2171. data/tests/wellformed/atom10/feed_subtitle_escaped_markup.xml +7 -0
  2172. data/tests/wellformed/atom10/feed_subtitle_inline_markup.xml +7 -0
  2173. data/tests/wellformed/atom10/feed_subtitle_inline_markup_2.xml +7 -0
  2174. data/tests/wellformed/atom10/feed_subtitle_text_plain.xml +7 -0
  2175. data/tests/wellformed/atom10/feed_title.xml +7 -0
  2176. data/tests/wellformed/atom10/feed_title_base64.xml +9 -0
  2177. data/tests/wellformed/atom10/feed_title_base64_2.xml +9 -0
  2178. data/tests/wellformed/atom10/feed_title_content_type.xml +7 -0
  2179. data/tests/wellformed/atom10/feed_title_content_type_text.xml +7 -0
  2180. data/tests/wellformed/atom10/feed_title_content_value.xml +7 -0
  2181. data/tests/wellformed/atom10/feed_title_escaped_markup.xml +7 -0
  2182. data/tests/wellformed/atom10/feed_title_inline_markup.xml +7 -0
  2183. data/tests/wellformed/atom10/feed_title_inline_markup_2.xml +7 -0
  2184. data/tests/wellformed/atom10/feed_title_text_plain.xml +7 -0
  2185. data/tests/wellformed/atom10/relative_uri.xml +7 -0
  2186. data/tests/wellformed/atom10/relative_uri_inherit.xml +7 -0
  2187. data/tests/wellformed/atom10/relative_uri_inherit_2.xml +7 -0
  2188. data/tests/wellformed/base/cdf_item_abstract_xml_base.xml +18 -0
  2189. data/tests/wellformed/base/entry_content_xml_base.xml +9 -0
  2190. data/tests/wellformed/base/entry_content_xml_base_inherit.xml +9 -0
  2191. data/tests/wellformed/base/entry_content_xml_base_inherit_2.xml +9 -0
  2192. data/tests/wellformed/base/entry_content_xml_base_inherit_3.xml +10 -0
  2193. data/tests/wellformed/base/entry_content_xml_base_inherit_4.xml +10 -0
  2194. data/tests/wellformed/base/entry_summary_xml_base.xml +9 -0
  2195. data/tests/wellformed/base/entry_summary_xml_base_inherit.xml +9 -0
  2196. data/tests/wellformed/base/entry_summary_xml_base_inherit_2.xml +9 -0
  2197. data/tests/wellformed/base/entry_summary_xml_base_inherit_3.xml +10 -0
  2198. data/tests/wellformed/base/entry_summary_xml_base_inherit_4.xml +10 -0
  2199. data/tests/wellformed/base/entry_title_xml_base.xml +9 -0
  2200. data/tests/wellformed/base/entry_title_xml_base_inherit.xml +9 -0
  2201. data/tests/wellformed/base/entry_title_xml_base_inherit_2.xml +9 -0
  2202. data/tests/wellformed/base/entry_title_xml_base_inherit_3.xml +10 -0
  2203. data/tests/wellformed/base/entry_title_xml_base_inherit_4.xml +10 -0
  2204. data/tests/wellformed/base/feed_copyright_xml_base.xml +7 -0
  2205. data/tests/wellformed/base/feed_copyright_xml_base_inherit.xml +7 -0
  2206. data/tests/wellformed/base/feed_copyright_xml_base_inherit_2.xml +7 -0
  2207. data/tests/wellformed/base/feed_copyright_xml_base_inherit_3.xml +8 -0
  2208. data/tests/wellformed/base/feed_copyright_xml_base_inherit_4.xml +8 -0
  2209. data/tests/wellformed/base/feed_info_xml_base.xml +7 -0
  2210. data/tests/wellformed/base/feed_info_xml_base_inherit.xml +7 -0
  2211. data/tests/wellformed/base/feed_info_xml_base_inherit_2.xml +7 -0
  2212. data/tests/wellformed/base/feed_info_xml_base_inherit_3.xml +8 -0
  2213. data/tests/wellformed/base/feed_info_xml_base_inherit_4.xml +8 -0
  2214. data/tests/wellformed/base/feed_tagline_xml_base.xml +7 -0
  2215. data/tests/wellformed/base/feed_tagline_xml_base_inherit.xml +7 -0
  2216. data/tests/wellformed/base/feed_tagline_xml_base_inherit_2.xml +7 -0
  2217. data/tests/wellformed/base/feed_tagline_xml_base_inherit_3.xml +8 -0
  2218. data/tests/wellformed/base/feed_tagline_xml_base_inherit_4.xml +8 -0
  2219. data/tests/wellformed/base/feed_title_xml_base.xml +7 -0
  2220. data/tests/wellformed/base/feed_title_xml_base_inherit.xml +7 -0
  2221. data/tests/wellformed/base/feed_title_xml_base_inherit_2.xml +7 -0
  2222. data/tests/wellformed/base/feed_title_xml_base_inherit_3.xml +8 -0
  2223. data/tests/wellformed/base/feed_title_xml_base_inherit_4.xml +8 -0
  2224. data/tests/wellformed/base/http_channel_docs_base_content_location.xml +10 -0
  2225. data/tests/wellformed/base/http_channel_docs_base_docuri.xml +9 -0
  2226. data/tests/wellformed/base/http_channel_link_base_content_location.xml +10 -0
  2227. data/tests/wellformed/base/http_channel_link_base_docuri.xml +9 -0
  2228. data/tests/wellformed/base/http_entry_author_url_base_content_location.xml +12 -0
  2229. data/tests/wellformed/base/http_entry_author_url_base_docuri.xml +11 -0
  2230. data/tests/wellformed/base/http_entry_content_base64_base_content_location.xml +12 -0
  2231. data/tests/wellformed/base/http_entry_content_base64_base_docuri.xml +11 -0
  2232. data/tests/wellformed/base/http_entry_content_base_content_location.xml +10 -0
  2233. data/tests/wellformed/base/http_entry_content_base_docuri.xml +9 -0
  2234. data/tests/wellformed/base/http_entry_content_inline_base_content_location.xml +10 -0
  2235. data/tests/wellformed/base/http_entry_content_inline_base_docuri.xml +9 -0
  2236. data/tests/wellformed/base/http_entry_contributor_url_base_content_location.xml +12 -0
  2237. data/tests/wellformed/base/http_entry_contributor_url_base_docuri.xml +11 -0
  2238. data/tests/wellformed/base/http_entry_id_base_content_location.xml +10 -0
  2239. data/tests/wellformed/base/http_entry_id_base_docuri.xml +9 -0
  2240. data/tests/wellformed/base/http_entry_link_base_content_location.xml +10 -0
  2241. data/tests/wellformed/base/http_entry_link_base_docuri.xml +9 -0
  2242. data/tests/wellformed/base/http_entry_summary_base64_base_content_location.xml +12 -0
  2243. data/tests/wellformed/base/http_entry_summary_base64_base_docuri.xml +11 -0
  2244. data/tests/wellformed/base/http_entry_summary_base_content_location.xml +10 -0
  2245. data/tests/wellformed/base/http_entry_summary_base_docuri.xml +9 -0
  2246. data/tests/wellformed/base/http_entry_summary_inline_base_content_location.xml +10 -0
  2247. data/tests/wellformed/base/http_entry_summary_inline_base_docuri.xml +9 -0
  2248. data/tests/wellformed/base/http_entry_title_base64_base_content_location.xml +12 -0
  2249. data/tests/wellformed/base/http_entry_title_base64_base_docuri.xml +11 -0
  2250. data/tests/wellformed/base/http_entry_title_base_content_location.xml +10 -0
  2251. data/tests/wellformed/base/http_entry_title_base_docuri.xml +9 -0
  2252. data/tests/wellformed/base/http_entry_title_inline_base_content_location.xml +10 -0
  2253. data/tests/wellformed/base/http_entry_title_inline_base_docuri.xml +9 -0
  2254. data/tests/wellformed/base/http_feed_author_url_base_content_location.xml +10 -0
  2255. data/tests/wellformed/base/http_feed_author_url_base_docuri.xml +9 -0
  2256. data/tests/wellformed/base/http_feed_contributor_url_base_content_location.xml +10 -0
  2257. data/tests/wellformed/base/http_feed_contributor_url_base_docuri.xml +9 -0
  2258. data/tests/wellformed/base/http_feed_copyright_base64_base_content_location.xml +10 -0
  2259. data/tests/wellformed/base/http_feed_copyright_base64_base_docuri.xml +9 -0
  2260. data/tests/wellformed/base/http_feed_copyright_base_content_location.xml +8 -0
  2261. data/tests/wellformed/base/http_feed_copyright_base_docuri.xml +7 -0
  2262. data/tests/wellformed/base/http_feed_copyright_inline_base_content_location.xml +8 -0
  2263. data/tests/wellformed/base/http_feed_copyright_inline_base_docuri.xml +7 -0
  2264. data/tests/wellformed/base/http_feed_generator_url_base_content_location.xml +8 -0
  2265. data/tests/wellformed/base/http_feed_generator_url_base_docuri.xml +7 -0
  2266. data/tests/wellformed/base/http_feed_id_base_content_location.xml +8 -0
  2267. data/tests/wellformed/base/http_feed_id_base_docuri.xml +7 -0
  2268. data/tests/wellformed/base/http_feed_info_base64_base_content_location.xml +10 -0
  2269. data/tests/wellformed/base/http_feed_info_base64_base_docuri.xml +9 -0
  2270. data/tests/wellformed/base/http_feed_info_base_content_location.xml +8 -0
  2271. data/tests/wellformed/base/http_feed_info_base_docuri.xml +7 -0
  2272. data/tests/wellformed/base/http_feed_info_inline_base_content_location.xml +8 -0
  2273. data/tests/wellformed/base/http_feed_info_inline_base_docuri.xml +7 -0
  2274. data/tests/wellformed/base/http_feed_link_base_content_location.xml +8 -0
  2275. data/tests/wellformed/base/http_feed_link_base_docuri.xml +7 -0
  2276. data/tests/wellformed/base/http_feed_tagline_base64_base_content_location.xml +10 -0
  2277. data/tests/wellformed/base/http_feed_tagline_base64_base_docuri.xml +9 -0
  2278. data/tests/wellformed/base/http_feed_tagline_base_content_location.xml +8 -0
  2279. data/tests/wellformed/base/http_feed_tagline_base_docuri.xml +7 -0
  2280. data/tests/wellformed/base/http_feed_tagline_inline_base_content_location.xml +8 -0
  2281. data/tests/wellformed/base/http_feed_tagline_inline_base_docuri.xml +7 -0
  2282. data/tests/wellformed/base/http_feed_title_base64_base_content_location.xml +10 -0
  2283. data/tests/wellformed/base/http_feed_title_base64_base_docuri.xml +9 -0
  2284. data/tests/wellformed/base/http_feed_title_base_content_location.xml +8 -0
  2285. data/tests/wellformed/base/http_feed_title_base_docuri.xml +7 -0
  2286. data/tests/wellformed/base/http_feed_title_inline_base_content_location.xml +8 -0
  2287. data/tests/wellformed/base/http_feed_title_inline_base_docuri.xml +7 -0
  2288. data/tests/wellformed/base/http_item_body_base_content_location.xml +12 -0
  2289. data/tests/wellformed/base/http_item_body_base_docuri.xml +11 -0
  2290. data/tests/wellformed/base/http_item_comments_base_content_location.xml +12 -0
  2291. data/tests/wellformed/base/http_item_comments_base_docuri.xml +11 -0
  2292. data/tests/wellformed/base/http_item_content_encoded_base_content_location.xml +12 -0
  2293. data/tests/wellformed/base/http_item_content_encoded_base_docuri.xml +11 -0
  2294. data/tests/wellformed/base/http_item_description_base_content_location.xml +12 -0
  2295. data/tests/wellformed/base/http_item_description_base_docuri.xml +11 -0
  2296. data/tests/wellformed/base/http_item_fullitem_base_content_location.xml +12 -0
  2297. data/tests/wellformed/base/http_item_fullitem_base_docuri.xml +11 -0
  2298. data/tests/wellformed/base/http_item_link_base_content_location.xml +12 -0
  2299. data/tests/wellformed/base/http_item_link_base_docuri.xml +11 -0
  2300. data/tests/wellformed/base/http_item_wfw_commentRSS_base_content_location.xml +12 -0
  2301. data/tests/wellformed/base/http_item_wfw_commentRSS_base_docuri.xml +11 -0
  2302. data/tests/wellformed/base/http_item_wfw_comment_base_content_location.xml +12 -0
  2303. data/tests/wellformed/base/http_item_wfw_comment_base_docuri.xml +11 -0
  2304. data/tests/wellformed/base/http_item_xhtml_body_base_content_location.xml +12 -0
  2305. data/tests/wellformed/base/http_item_xhtml_body_base_docuri.xml +11 -0
  2306. data/tests/wellformed/base/http_relative_xml_base.xml +10 -0
  2307. data/tests/wellformed/base/http_relative_xml_base_2.xml +10 -0
  2308. data/tests/wellformed/base/malformed_base.xml +9 -0
  2309. data/tests/wellformed/base/relative_xml_base.xml +9 -0
  2310. data/tests/wellformed/base/relative_xml_base_2.xml +9 -0
  2311. data/tests/wellformed/cdf/channel_abstract_map_description.xml +7 -0
  2312. data/tests/wellformed/cdf/channel_abstract_map_tagline.xml +7 -0
  2313. data/tests/wellformed/cdf/channel_href_map_link.xml +6 -0
  2314. data/tests/wellformed/cdf/channel_href_map_links.xml +6 -0
  2315. data/tests/wellformed/cdf/channel_title.xml +7 -0
  2316. data/tests/wellformed/cdf/item_abstract_map_description.xml +9 -0
  2317. data/tests/wellformed/cdf/item_abstract_map_summary.xml +9 -0
  2318. data/tests/wellformed/cdf/item_href_map_link.xml +8 -0
  2319. data/tests/wellformed/cdf/item_href_map_links.xml +8 -0
  2320. data/tests/wellformed/cdf/item_title.xml +9 -0
  2321. data/tests/wellformed/date/cdf_channel_lastmod_map_date.xml +6 -0
  2322. data/tests/wellformed/date/cdf_channel_lastmod_map_modified.xml +6 -0
  2323. data/tests/wellformed/date/cdf_channel_lastmod_map_modified_parsed.xml +6 -0
  2324. data/tests/wellformed/date/cdf_item_lastmod_map_date.xml +8 -0
  2325. data/tests/wellformed/date/cdf_item_lastmod_map_modified.xml +8 -0
  2326. data/tests/wellformed/date/cdf_item_lastmod_map_modified_parsed.xml +8 -0
  2327. data/tests/wellformed/date/channel_dc_date.xml +9 -0
  2328. data/tests/wellformed/date/channel_dc_date_map_modified.xml +9 -0
  2329. data/tests/wellformed/date/channel_dc_date_w3dtf_utc.xml +9 -0
  2330. data/tests/wellformed/date/channel_dc_date_w3dtf_utc_map_modified_parsed.xml +9 -0
  2331. data/tests/wellformed/date/channel_dcterms_created.xml +9 -0
  2332. data/tests/wellformed/date/channel_dcterms_created_w3dtf_utc.xml +9 -0
  2333. data/tests/wellformed/date/channel_dcterms_issued.xml +9 -0
  2334. data/tests/wellformed/date/channel_dcterms_issued_w3dtf_utc.xml +9 -0
  2335. data/tests/wellformed/date/channel_dcterms_modified.xml +9 -0
  2336. data/tests/wellformed/date/channel_dcterms_modified_map_date.xml +9 -0
  2337. data/tests/wellformed/date/channel_dcterms_modified_w3dtf_utc.xml +9 -0
  2338. data/tests/wellformed/date/channel_dcterms_modified_w3dtf_utc_map_date.xml +9 -0
  2339. data/tests/wellformed/date/channel_pubDate.xml +9 -0
  2340. data/tests/wellformed/date/channel_pubDate_asctime.xml +9 -0
  2341. data/tests/wellformed/date/channel_pubDate_disney.xml +9 -0
  2342. data/tests/wellformed/date/channel_pubDate_disney_at.xml +9 -0
  2343. data/tests/wellformed/date/channel_pubDate_disney_ct.xml +9 -0
  2344. data/tests/wellformed/date/channel_pubDate_disney_mt.xml +9 -0
  2345. data/tests/wellformed/date/channel_pubDate_disney_pt.xml +9 -0
  2346. data/tests/wellformed/date/channel_pubDate_greek_1.xml +9 -0
  2347. data/tests/wellformed/date/channel_pubDate_hungarian_1.xml +9 -0
  2348. data/tests/wellformed/date/channel_pubDate_iso8601_ym.xml +9 -0
  2349. data/tests/wellformed/date/channel_pubDate_iso8601_ym_2.xml +9 -0
  2350. data/tests/wellformed/date/channel_pubDate_iso8601_ymd.xml +9 -0
  2351. data/tests/wellformed/date/channel_pubDate_iso8601_ymd_2.xml +9 -0
  2352. data/tests/wellformed/date/channel_pubDate_iso8601_yo_2.xml +9 -0
  2353. data/tests/wellformed/date/channel_pubDate_korean_nate.xml +11 -0
  2354. data/tests/wellformed/date/channel_pubDate_map_modified.xml +9 -0
  2355. data/tests/wellformed/date/channel_pubDate_mssql.xml +9 -0
  2356. data/tests/wellformed/date/channel_pubDate_mssql_nofraction.xml +9 -0
  2357. data/tests/wellformed/date/channel_pubDate_nosecond.xml +9 -0
  2358. data/tests/wellformed/date/channel_pubDate_notime.xml +9 -0
  2359. data/tests/wellformed/date/channel_pubDate_rfc2822.xml +9 -0
  2360. data/tests/wellformed/date/channel_pubDate_rfc2822_rollover_june_31.xml +9 -0
  2361. data/tests/wellformed/date/channel_pubDate_rfc822.xml +9 -0
  2362. data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_25h.xml +9 -0
  2363. data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_61m.xml +9 -0
  2364. data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_61s.xml +9 -0
  2365. data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_leapyear.xml +9 -0
  2366. data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_leapyear400.xml +9 -0
  2367. data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_nonleapyear.xml +9 -0
  2368. data/tests/wellformed/date/channel_pubDate_w3dtf_sf.xml +9 -0
  2369. data/tests/wellformed/date/channel_pubDate_w3dtf_tokyo.xml +9 -0
  2370. data/tests/wellformed/date/channel_pubDate_w3dtf_utc.xml +9 -0
  2371. data/tests/wellformed/date/channel_pubDate_w3dtf_y.xml +9 -0
  2372. data/tests/wellformed/date/channel_pubDate_w3dtf_ym.xml +9 -0
  2373. data/tests/wellformed/date/channel_pubDate_w3dtf_ymd.xml +9 -0
  2374. data/tests/wellformed/date/channel_pubDate_w3dtf_ymd_2.xml +9 -0
  2375. data/tests/wellformed/date/entry_created.xml +9 -0
  2376. data/tests/wellformed/date/entry_created_w3dtf_utc.xml +9 -0
  2377. data/tests/wellformed/date/entry_issued.xml +9 -0
  2378. data/tests/wellformed/date/entry_issued_w3dtf_utc.xml +9 -0
  2379. data/tests/wellformed/date/entry_modified.xml +9 -0
  2380. data/tests/wellformed/date/entry_modified_map_date.xml +9 -0
  2381. data/tests/wellformed/date/entry_modified_w3dtf_utc.xml +9 -0
  2382. data/tests/wellformed/date/entry_published_w3dtf_utc.xml +9 -0
  2383. data/tests/wellformed/date/entry_source_updated_w3dtf_utc.xml +11 -0
  2384. data/tests/wellformed/date/entry_updated_w3dtf_utc.xml +9 -0
  2385. data/tests/wellformed/date/feed_modified.xml +9 -0
  2386. data/tests/wellformed/date/feed_modified_asctime.xml +9 -0
  2387. data/tests/wellformed/date/feed_modified_disney.xml +7 -0
  2388. data/tests/wellformed/date/feed_modified_disney_at.xml +7 -0
  2389. data/tests/wellformed/date/feed_modified_disney_ct.xml +7 -0
  2390. data/tests/wellformed/date/feed_modified_disney_mt.xml +7 -0
  2391. data/tests/wellformed/date/feed_modified_disney_pt.xml +7 -0
  2392. data/tests/wellformed/date/feed_modified_iso8601_ym.xml +9 -0
  2393. data/tests/wellformed/date/feed_modified_iso8601_ym_2.xml +9 -0
  2394. data/tests/wellformed/date/feed_modified_iso8601_ymd.xml +9 -0
  2395. data/tests/wellformed/date/feed_modified_iso8601_ymd_2.xml +9 -0
  2396. data/tests/wellformed/date/feed_modified_iso8601_yo_2.xml +9 -0
  2397. data/tests/wellformed/date/feed_modified_map_date.xml +9 -0
  2398. data/tests/wellformed/date/feed_modified_rfc2822.xml +9 -0
  2399. data/tests/wellformed/date/feed_modified_rfc2822_rollover_june_31.xml +9 -0
  2400. data/tests/wellformed/date/feed_modified_rfc822.xml +9 -0
  2401. data/tests/wellformed/date/feed_modified_w3dtf_rollover_leapyear.xml +9 -0
  2402. data/tests/wellformed/date/feed_modified_w3dtf_rollover_leapyear400.xml +9 -0
  2403. data/tests/wellformed/date/feed_modified_w3dtf_rollover_nonleapyear.xml +9 -0
  2404. data/tests/wellformed/date/feed_modified_w3dtf_sf.xml +9 -0
  2405. data/tests/wellformed/date/feed_modified_w3dtf_tokyo.xml +9 -0
  2406. data/tests/wellformed/date/feed_modified_w3dtf_utc.xml +9 -0
  2407. data/tests/wellformed/date/feed_modified_w3dtf_y.xml +9 -0
  2408. data/tests/wellformed/date/feed_modified_w3dtf_ym.xml +9 -0
  2409. data/tests/wellformed/date/feed_modified_w3dtf_ymd.xml +9 -0
  2410. data/tests/wellformed/date/feed_modified_w3dtf_ymd_2.xml +9 -0
  2411. data/tests/wellformed/date/feed_updated_w3dtf_utc.xml +7 -0
  2412. data/tests/wellformed/date/item_dc_date.xml +11 -0
  2413. data/tests/wellformed/date/item_dc_date_map_modified.xml +11 -0
  2414. data/tests/wellformed/date/item_dc_date_w3dtf_utc.xml +11 -0
  2415. data/tests/wellformed/date/item_dc_date_w3dtf_utc_map_modified_parsed.xml +11 -0
  2416. data/tests/wellformed/date/item_dcterms_created.xml +11 -0
  2417. data/tests/wellformed/date/item_dcterms_created_w3dtf_utc.xml +11 -0
  2418. data/tests/wellformed/date/item_dcterms_issued.xml +11 -0
  2419. data/tests/wellformed/date/item_dcterms_issued_w3dtf_utc.xml +11 -0
  2420. data/tests/wellformed/date/item_dcterms_modified.xml +11 -0
  2421. data/tests/wellformed/date/item_dcterms_modified_map_date.xml +11 -0
  2422. data/tests/wellformed/date/item_dcterms_modified_w3dtf_utc.xml +11 -0
  2423. data/tests/wellformed/date/item_dcterms_modified_w3dtf_utc_map_date.xml +11 -0
  2424. data/tests/wellformed/date/item_expirationDate.xml +11 -0
  2425. data/tests/wellformed/date/item_expirationDate_rfc2822.xml +11 -0
  2426. data/tests/wellformed/date/item_pubDate.xml +11 -0
  2427. data/tests/wellformed/date/item_pubDate_euc-kr.xml +13 -0
  2428. data/tests/wellformed/date/item_pubDate_map_modified.xml +11 -0
  2429. data/tests/wellformed/date/item_pubDate_rfc2822.xml +11 -0
  2430. data/tests/wellformed/encoding/big5.xml +8 -0
  2431. data/tests/wellformed/encoding/csucs4.xml +0 -0
  2432. data/tests/wellformed/encoding/csunicode.xml +0 -0
  2433. data/tests/wellformed/encoding/encoding_attribute_crash.xml +9 -0
  2434. data/tests/wellformed/encoding/encoding_attribute_crash_2.xml +9 -0
  2435. data/tests/wellformed/encoding/euc-kr-attribute.xml +14 -0
  2436. data/tests/wellformed/encoding/euc-kr-item.xml +14 -0
  2437. data/tests/wellformed/encoding/euc-kr.xml +12 -0
  2438. data/tests/wellformed/encoding/http_application_atom_xml_charset.xml +8 -0
  2439. data/tests/wellformed/encoding/http_application_atom_xml_charset_overrides_encoding.xml +8 -0
  2440. data/tests/wellformed/encoding/http_application_atom_xml_default.xml +8 -0
  2441. data/tests/wellformed/encoding/http_application_atom_xml_encoding.xml +8 -0
  2442. data/tests/wellformed/encoding/http_application_rss_xml_charset.xml +8 -0
  2443. data/tests/wellformed/encoding/http_application_rss_xml_charset_overrides_encoding.xml +8 -0
  2444. data/tests/wellformed/encoding/http_application_rss_xml_default.xml +8 -0
  2445. data/tests/wellformed/encoding/http_application_rss_xml_encoding.xml +8 -0
  2446. data/tests/wellformed/encoding/http_application_xml_charset.xml +8 -0
  2447. data/tests/wellformed/encoding/http_application_xml_charset_overrides_encoding.xml +8 -0
  2448. data/tests/wellformed/encoding/http_application_xml_default.xml +8 -0
  2449. data/tests/wellformed/encoding/http_application_xml_dtd_charset.xml +8 -0
  2450. data/tests/wellformed/encoding/http_application_xml_dtd_charset_overrides_encoding.xml +8 -0
  2451. data/tests/wellformed/encoding/http_application_xml_dtd_default.xml +8 -0
  2452. data/tests/wellformed/encoding/http_application_xml_dtd_encoding.xml +8 -0
  2453. data/tests/wellformed/encoding/http_application_xml_encoding.xml +8 -0
  2454. data/tests/wellformed/encoding/http_application_xml_epe_charset.xml +8 -0
  2455. data/tests/wellformed/encoding/http_application_xml_epe_charset_overrides_encoding.xml +8 -0
  2456. data/tests/wellformed/encoding/http_application_xml_epe_default.xml +8 -0
  2457. data/tests/wellformed/encoding/http_application_xml_epe_encoding.xml +8 -0
  2458. data/tests/wellformed/encoding/http_encoding_attribute_crash.xml +13 -0
  2459. data/tests/wellformed/encoding/http_i18n.xml +13 -0
  2460. data/tests/wellformed/encoding/http_text_atom_xml_charset.xml +8 -0
  2461. data/tests/wellformed/encoding/http_text_atom_xml_charset_overrides_encoding.xml +8 -0
  2462. data/tests/wellformed/encoding/http_text_atom_xml_default.xml +8 -0
  2463. data/tests/wellformed/encoding/http_text_atom_xml_encoding.xml +8 -0
  2464. data/tests/wellformed/encoding/http_text_rss_xml_charset.xml +8 -0
  2465. data/tests/wellformed/encoding/http_text_rss_xml_charset_overrides_encoding.xml +8 -0
  2466. data/tests/wellformed/encoding/http_text_rss_xml_default.xml +8 -0
  2467. data/tests/wellformed/encoding/http_text_rss_xml_encoding.xml +8 -0
  2468. data/tests/wellformed/encoding/http_text_xml_bogus_charset.xml +8 -0
  2469. data/tests/wellformed/encoding/http_text_xml_bogus_param.xml +8 -0
  2470. data/tests/wellformed/encoding/http_text_xml_charset.xml +8 -0
  2471. data/tests/wellformed/encoding/http_text_xml_charset_2.xml +16 -0
  2472. data/tests/wellformed/encoding/http_text_xml_charset_overrides_encoding.xml +8 -0
  2473. data/tests/wellformed/encoding/http_text_xml_charset_overrides_encoding_2.xml +17 -0
  2474. data/tests/wellformed/encoding/http_text_xml_default.xml +8 -0
  2475. data/tests/wellformed/encoding/http_text_xml_epe_charset.xml +8 -0
  2476. data/tests/wellformed/encoding/http_text_xml_epe_charset_overrides_encoding.xml +8 -0
  2477. data/tests/wellformed/encoding/http_text_xml_epe_default.xml +8 -0
  2478. data/tests/wellformed/encoding/http_text_xml_epe_encoding.xml +8 -0
  2479. data/tests/wellformed/encoding/http_text_xml_qs.xml +8 -0
  2480. data/tests/wellformed/encoding/iso-10646-ucs-2.xml +0 -0
  2481. data/tests/wellformed/encoding/iso-10646-ucs-4.xml +0 -0
  2482. data/tests/wellformed/encoding/no_content_type_default.xml +7 -0
  2483. data/tests/wellformed/encoding/no_content_type_encoding.xml +7 -0
  2484. data/tests/wellformed/encoding/u16.xml +0 -0
  2485. data/tests/wellformed/encoding/ucs-2.xml +0 -0
  2486. data/tests/wellformed/encoding/ucs-4.xml +0 -0
  2487. data/tests/wellformed/encoding/utf-16be-autodetect.xml +0 -0
  2488. data/tests/wellformed/encoding/utf-16be-bom.xml +0 -0
  2489. data/tests/wellformed/encoding/utf-16be.xml +0 -0
  2490. data/tests/wellformed/encoding/utf-16le-autodetect.xml +0 -0
  2491. data/tests/wellformed/encoding/utf-16le-bom.xml +0 -0
  2492. data/tests/wellformed/encoding/utf-16le.xml +0 -0
  2493. data/tests/wellformed/encoding/utf-32be-autodetect.xml +0 -0
  2494. data/tests/wellformed/encoding/utf-32be-bom.xml +0 -0
  2495. data/tests/wellformed/encoding/utf-32be.xml +0 -0
  2496. data/tests/wellformed/encoding/utf-32le-autodetect.xml +0 -0
  2497. data/tests/wellformed/encoding/utf-32le-bom.xml +0 -0
  2498. data/tests/wellformed/encoding/utf-32le.xml +0 -0
  2499. data/tests/wellformed/encoding/utf-8-bom.xml +8 -0
  2500. data/tests/wellformed/encoding/utf16.xml +0 -0
  2501. data/tests/wellformed/encoding/utf_16.xml +0 -0
  2502. data/tests/wellformed/encoding/utf_32.xml +0 -0
  2503. data/tests/wellformed/encoding/x80_437.xml +9 -0
  2504. data/tests/wellformed/encoding/x80_850.xml +9 -0
  2505. data/tests/wellformed/encoding/x80_852.xml +9 -0
  2506. data/tests/wellformed/encoding/x80_855.xml +9 -0
  2507. data/tests/wellformed/encoding/x80_857.xml +9 -0
  2508. data/tests/wellformed/encoding/x80_860.xml +9 -0
  2509. data/tests/wellformed/encoding/x80_861.xml +9 -0
  2510. data/tests/wellformed/encoding/x80_862.xml +9 -0
  2511. data/tests/wellformed/encoding/x80_863.xml +9 -0
  2512. data/tests/wellformed/encoding/x80_865.xml +9 -0
  2513. data/tests/wellformed/encoding/x80_866.xml +9 -0
  2514. data/tests/wellformed/encoding/x80_cp037.xml +1 -0
  2515. data/tests/wellformed/encoding/x80_cp1125.xml +9 -0
  2516. data/tests/wellformed/encoding/x80_cp1250.xml +9 -0
  2517. data/tests/wellformed/encoding/x80_cp1251.xml +9 -0
  2518. data/tests/wellformed/encoding/x80_cp1252.xml +9 -0
  2519. data/tests/wellformed/encoding/x80_cp1253.xml +9 -0
  2520. data/tests/wellformed/encoding/x80_cp1254.xml +9 -0
  2521. data/tests/wellformed/encoding/x80_cp1255.xml +9 -0
  2522. data/tests/wellformed/encoding/x80_cp1256.xml +9 -0
  2523. data/tests/wellformed/encoding/x80_cp1257.xml +9 -0
  2524. data/tests/wellformed/encoding/x80_cp1258.xml +9 -0
  2525. data/tests/wellformed/encoding/x80_cp437.xml +9 -0
  2526. data/tests/wellformed/encoding/x80_cp500.xml +1 -0
  2527. data/tests/wellformed/encoding/x80_cp737.xml +9 -0
  2528. data/tests/wellformed/encoding/x80_cp775.xml +9 -0
  2529. data/tests/wellformed/encoding/x80_cp850.xml +9 -0
  2530. data/tests/wellformed/encoding/x80_cp852.xml +9 -0
  2531. data/tests/wellformed/encoding/x80_cp855.xml +9 -0
  2532. data/tests/wellformed/encoding/x80_cp856.xml +9 -0
  2533. data/tests/wellformed/encoding/x80_cp857.xml +9 -0
  2534. data/tests/wellformed/encoding/x80_cp860.xml +9 -0
  2535. data/tests/wellformed/encoding/x80_cp861.xml +9 -0
  2536. data/tests/wellformed/encoding/x80_cp862.xml +9 -0
  2537. data/tests/wellformed/encoding/x80_cp863.xml +9 -0
  2538. data/tests/wellformed/encoding/x80_cp864.xml +9 -0
  2539. data/tests/wellformed/encoding/x80_cp865.xml +9 -0
  2540. data/tests/wellformed/encoding/x80_cp866.xml +9 -0
  2541. data/tests/wellformed/encoding/x80_cp874.xml +9 -0
  2542. data/tests/wellformed/encoding/x80_cp875.xml +1 -0
  2543. data/tests/wellformed/encoding/x80_cp_is.xml +9 -0
  2544. data/tests/wellformed/encoding/x80_csibm037.xml +1 -0
  2545. data/tests/wellformed/encoding/x80_csibm500.xml +1 -0
  2546. data/tests/wellformed/encoding/x80_csibm855.xml +9 -0
  2547. data/tests/wellformed/encoding/x80_csibm857.xml +9 -0
  2548. data/tests/wellformed/encoding/x80_csibm860.xml +9 -0
  2549. data/tests/wellformed/encoding/x80_csibm861.xml +9 -0
  2550. data/tests/wellformed/encoding/x80_csibm863.xml +9 -0
  2551. data/tests/wellformed/encoding/x80_csibm864.xml +9 -0
  2552. data/tests/wellformed/encoding/x80_csibm865.xml +9 -0
  2553. data/tests/wellformed/encoding/x80_csibm866.xml +9 -0
  2554. data/tests/wellformed/encoding/x80_cskoi8r.xml +9 -0
  2555. data/tests/wellformed/encoding/x80_csmacintosh.xml +9 -0
  2556. data/tests/wellformed/encoding/x80_cspc775baltic.xml +9 -0
  2557. data/tests/wellformed/encoding/x80_cspc850multilingual.xml +9 -0
  2558. data/tests/wellformed/encoding/x80_cspc862latinhebrew.xml +9 -0
  2559. data/tests/wellformed/encoding/x80_cspc8codepage437.xml +9 -0
  2560. data/tests/wellformed/encoding/x80_cspcp852.xml +9 -0
  2561. data/tests/wellformed/encoding/x80_dbcs.xml +9 -0
  2562. data/tests/wellformed/encoding/x80_ebcdic-cp-be.xml +1 -0
  2563. data/tests/wellformed/encoding/x80_ebcdic-cp-ca.xml +1 -0
  2564. data/tests/wellformed/encoding/x80_ebcdic-cp-ch.xml +1 -0
  2565. data/tests/wellformed/encoding/x80_ebcdic-cp-nl.xml +1 -0
  2566. data/tests/wellformed/encoding/x80_ebcdic-cp-us.xml +1 -0
  2567. data/tests/wellformed/encoding/x80_ebcdic-cp-wt.xml +1 -0
  2568. data/tests/wellformed/encoding/x80_ebcdic_cp_be.xml +1 -0
  2569. data/tests/wellformed/encoding/x80_ebcdic_cp_ca.xml +1 -0
  2570. data/tests/wellformed/encoding/x80_ebcdic_cp_ch.xml +1 -0
  2571. data/tests/wellformed/encoding/x80_ebcdic_cp_nl.xml +1 -0
  2572. data/tests/wellformed/encoding/x80_ebcdic_cp_us.xml +1 -0
  2573. data/tests/wellformed/encoding/x80_ebcdic_cp_wt.xml +1 -0
  2574. data/tests/wellformed/encoding/x80_ibm037.xml +1 -0
  2575. data/tests/wellformed/encoding/x80_ibm039.xml +1 -0
  2576. data/tests/wellformed/encoding/x80_ibm1140.xml +1 -0
  2577. data/tests/wellformed/encoding/x80_ibm437.xml +9 -0
  2578. data/tests/wellformed/encoding/x80_ibm500.xml +1 -0
  2579. data/tests/wellformed/encoding/x80_ibm775.xml +9 -0
  2580. data/tests/wellformed/encoding/x80_ibm850.xml +9 -0
  2581. data/tests/wellformed/encoding/x80_ibm852.xml +9 -0
  2582. data/tests/wellformed/encoding/x80_ibm855.xml +9 -0
  2583. data/tests/wellformed/encoding/x80_ibm857.xml +9 -0
  2584. data/tests/wellformed/encoding/x80_ibm860.xml +9 -0
  2585. data/tests/wellformed/encoding/x80_ibm861.xml +9 -0
  2586. data/tests/wellformed/encoding/x80_ibm862.xml +9 -0
  2587. data/tests/wellformed/encoding/x80_ibm863.xml +9 -0
  2588. data/tests/wellformed/encoding/x80_ibm864.xml +9 -0
  2589. data/tests/wellformed/encoding/x80_ibm865.xml +9 -0
  2590. data/tests/wellformed/encoding/x80_ibm866.xml +9 -0
  2591. data/tests/wellformed/encoding/x80_koi8-r.xml +9 -0
  2592. data/tests/wellformed/encoding/x80_koi8-t.xml +9 -0
  2593. data/tests/wellformed/encoding/x80_koi8-u.xml +9 -0
  2594. data/tests/wellformed/encoding/x80_mac-cyrillic.xml +9 -0
  2595. data/tests/wellformed/encoding/x80_mac.xml +9 -0
  2596. data/tests/wellformed/encoding/x80_maccentraleurope.xml +9 -0
  2597. data/tests/wellformed/encoding/x80_maccyrillic.xml +9 -0
  2598. data/tests/wellformed/encoding/x80_macgreek.xml +9 -0
  2599. data/tests/wellformed/encoding/x80_maciceland.xml +9 -0
  2600. data/tests/wellformed/encoding/x80_macintosh.xml +9 -0
  2601. data/tests/wellformed/encoding/x80_maclatin2.xml +9 -0
  2602. data/tests/wellformed/encoding/x80_macroman.xml +9 -0
  2603. data/tests/wellformed/encoding/x80_macturkish.xml +9 -0
  2604. data/tests/wellformed/encoding/x80_ms-ansi.xml +9 -0
  2605. data/tests/wellformed/encoding/x80_ms-arab.xml +9 -0
  2606. data/tests/wellformed/encoding/x80_ms-cyrl.xml +9 -0
  2607. data/tests/wellformed/encoding/x80_ms-ee.xml +9 -0
  2608. data/tests/wellformed/encoding/x80_ms-greek.xml +9 -0
  2609. data/tests/wellformed/encoding/x80_ms-hebr.xml +9 -0
  2610. data/tests/wellformed/encoding/x80_ms-turk.xml +9 -0
  2611. data/tests/wellformed/encoding/x80_tcvn-5712.xml +9 -0
  2612. data/tests/wellformed/encoding/x80_tcvn.xml +9 -0
  2613. data/tests/wellformed/encoding/x80_tcvn5712-1.xml +9 -0
  2614. data/tests/wellformed/encoding/x80_viscii.xml +9 -0
  2615. data/tests/wellformed/encoding/x80_winbaltrim.xml +9 -0
  2616. data/tests/wellformed/encoding/x80_windows-1250.xml +9 -0
  2617. data/tests/wellformed/encoding/x80_windows-1251.xml +9 -0
  2618. data/tests/wellformed/encoding/x80_windows-1252.xml +9 -0
  2619. data/tests/wellformed/encoding/x80_windows-1253.xml +9 -0
  2620. data/tests/wellformed/encoding/x80_windows-1254.xml +9 -0
  2621. data/tests/wellformed/encoding/x80_windows-1255.xml +9 -0
  2622. data/tests/wellformed/encoding/x80_windows-1256.xml +9 -0
  2623. data/tests/wellformed/encoding/x80_windows-1257.xml +9 -0
  2624. data/tests/wellformed/encoding/x80_windows-1258.xml +9 -0
  2625. data/tests/wellformed/encoding/x80_windows_1250.xml +9 -0
  2626. data/tests/wellformed/encoding/x80_windows_1251.xml +9 -0
  2627. data/tests/wellformed/encoding/x80_windows_1252.xml +9 -0
  2628. data/tests/wellformed/encoding/x80_windows_1253.xml +9 -0
  2629. data/tests/wellformed/encoding/x80_windows_1254.xml +9 -0
  2630. data/tests/wellformed/encoding/x80_windows_1255.xml +9 -0
  2631. data/tests/wellformed/encoding/x80_windows_1256.xml +9 -0
  2632. data/tests/wellformed/encoding/x80_windows_1257.xml +9 -0
  2633. data/tests/wellformed/encoding/x80_windows_1258.xml +9 -0
  2634. data/tests/wellformed/feedburner/feedburner_browserfriendly.xml +9 -0
  2635. data/tests/wellformed/http/headers_foo.xml +7 -0
  2636. data/tests/wellformed/itunes/itunes_channel_block.xml +9 -0
  2637. data/tests/wellformed/itunes/itunes_channel_block_false.xml +9 -0
  2638. data/tests/wellformed/itunes/itunes_channel_block_no.xml +9 -0
  2639. data/tests/wellformed/itunes/itunes_channel_block_true.xml +9 -0
  2640. data/tests/wellformed/itunes/itunes_channel_block_uppercase.xml +9 -0
  2641. data/tests/wellformed/itunes/itunes_channel_block_whitespace.xml +9 -0
  2642. data/tests/wellformed/itunes/itunes_channel_category.xml +9 -0
  2643. data/tests/wellformed/itunes/itunes_channel_category_nested.xml +11 -0
  2644. data/tests/wellformed/itunes/itunes_channel_category_scheme.xml +9 -0
  2645. data/tests/wellformed/itunes/itunes_channel_explicit.xml +9 -0
  2646. data/tests/wellformed/itunes/itunes_channel_explicit_false.xml +9 -0
  2647. data/tests/wellformed/itunes/itunes_channel_explicit_no.xml +9 -0
  2648. data/tests/wellformed/itunes/itunes_channel_explicit_true.xml +9 -0
  2649. data/tests/wellformed/itunes/itunes_channel_explicit_uppercase.xml +9 -0
  2650. data/tests/wellformed/itunes/itunes_channel_explicit_whitespace.xml +9 -0
  2651. data/tests/wellformed/itunes/itunes_channel_image.xml +9 -0
  2652. data/tests/wellformed/itunes/itunes_channel_keywords.xml +9 -0
  2653. data/tests/wellformed/itunes/itunes_channel_keywords_duplicate.xml +9 -0
  2654. data/tests/wellformed/itunes/itunes_channel_keywords_duplicate_2.xml +10 -0
  2655. data/tests/wellformed/itunes/itunes_channel_keywords_multiple.xml +9 -0
  2656. data/tests/wellformed/itunes/itunes_channel_link_image.xml +9 -0
  2657. data/tests/wellformed/itunes/itunes_channel_owner_email.xml +12 -0
  2658. data/tests/wellformed/itunes/itunes_channel_owner_name.xml +12 -0
  2659. data/tests/wellformed/itunes/itunes_channel_subtitle.xml +9 -0
  2660. data/tests/wellformed/itunes/itunes_channel_summary.xml +9 -0
  2661. data/tests/wellformed/itunes/itunes_core_element_uppercase.xml +9 -0
  2662. data/tests/wellformed/itunes/itunes_enclosure_url_maps_id.xml +11 -0
  2663. data/tests/wellformed/itunes/itunes_enclosure_url_maps_id_2.xml +12 -0
  2664. data/tests/wellformed/itunes/itunes_item_author_map_author.xml +11 -0
  2665. data/tests/wellformed/itunes/itunes_item_block.xml +11 -0
  2666. data/tests/wellformed/itunes/itunes_item_block_false.xml +11 -0
  2667. data/tests/wellformed/itunes/itunes_item_block_no.xml +11 -0
  2668. data/tests/wellformed/itunes/itunes_item_block_true.xml +11 -0
  2669. data/tests/wellformed/itunes/itunes_item_block_uppercase.xml +11 -0
  2670. data/tests/wellformed/itunes/itunes_item_block_whitespace.xml +11 -0
  2671. data/tests/wellformed/itunes/itunes_item_category.xml +11 -0
  2672. data/tests/wellformed/itunes/itunes_item_category_nested.xml +13 -0
  2673. data/tests/wellformed/itunes/itunes_item_category_scheme.xml +11 -0
  2674. data/tests/wellformed/itunes/itunes_item_duration.xml +11 -0
  2675. data/tests/wellformed/itunes/itunes_item_explicit.xml +11 -0
  2676. data/tests/wellformed/itunes/itunes_item_explicit_false.xml +11 -0
  2677. data/tests/wellformed/itunes/itunes_item_explicit_no.xml +11 -0
  2678. data/tests/wellformed/itunes/itunes_item_explicit_true.xml +11 -0
  2679. data/tests/wellformed/itunes/itunes_item_explicit_uppercase.xml +11 -0
  2680. data/tests/wellformed/itunes/itunes_item_explicit_whitespace.xml +11 -0
  2681. data/tests/wellformed/itunes/itunes_item_image.xml +11 -0
  2682. data/tests/wellformed/itunes/itunes_item_link_image.xml +11 -0
  2683. data/tests/wellformed/itunes/itunes_item_subtitle.xml +11 -0
  2684. data/tests/wellformed/itunes/itunes_item_summary.xml +11 -0
  2685. data/tests/wellformed/itunes/itunes_link_enclosure_maps_id.xml +9 -0
  2686. data/tests/wellformed/itunes/itunes_link_enclosure_maps_id_2.xml +10 -0
  2687. data/tests/wellformed/itunes/itunes_namespace.xml +9 -0
  2688. data/tests/wellformed/itunes/itunes_namespace_example.xml +9 -0
  2689. data/tests/wellformed/itunes/itunes_namespace_lowercase.xml +9 -0
  2690. data/tests/wellformed/itunes/itunes_namespace_uppercase.xml +9 -0
  2691. data/tests/wellformed/lang/channel_dc_language.xml +9 -0
  2692. data/tests/wellformed/lang/channel_language.xml +9 -0
  2693. data/tests/wellformed/lang/entry_content_xml_lang.xml +9 -0
  2694. data/tests/wellformed/lang/entry_content_xml_lang_blank.xml +9 -0
  2695. data/tests/wellformed/lang/entry_content_xml_lang_blank_2.xml +9 -0
  2696. data/tests/wellformed/lang/entry_content_xml_lang_blank_3.xml +12 -0
  2697. data/tests/wellformed/lang/entry_content_xml_lang_inherit.xml +9 -0
  2698. data/tests/wellformed/lang/entry_content_xml_lang_inherit_2.xml +9 -0
  2699. data/tests/wellformed/lang/entry_content_xml_lang_inherit_3.xml +10 -0
  2700. data/tests/wellformed/lang/entry_content_xml_lang_inherit_4.xml +10 -0
  2701. data/tests/wellformed/lang/entry_summary_xml_lang.xml +9 -0
  2702. data/tests/wellformed/lang/entry_summary_xml_lang_blank.xml +9 -0
  2703. data/tests/wellformed/lang/entry_summary_xml_lang_inherit.xml +9 -0
  2704. data/tests/wellformed/lang/entry_summary_xml_lang_inherit_2.xml +9 -0
  2705. data/tests/wellformed/lang/entry_summary_xml_lang_inherit_3.xml +10 -0
  2706. data/tests/wellformed/lang/entry_summary_xml_lang_inherit_4.xml +10 -0
  2707. data/tests/wellformed/lang/entry_title_xml_lang.xml +9 -0
  2708. data/tests/wellformed/lang/entry_title_xml_lang_blank.xml +9 -0
  2709. data/tests/wellformed/lang/entry_title_xml_lang_inherit.xml +9 -0
  2710. data/tests/wellformed/lang/entry_title_xml_lang_inherit_2.xml +9 -0
  2711. data/tests/wellformed/lang/entry_title_xml_lang_inherit_3.xml +10 -0
  2712. data/tests/wellformed/lang/entry_title_xml_lang_inherit_4.xml +10 -0
  2713. data/tests/wellformed/lang/feed_copyright_xml_lang.xml +7 -0
  2714. data/tests/wellformed/lang/feed_copyright_xml_lang_blank.xml +7 -0
  2715. data/tests/wellformed/lang/feed_copyright_xml_lang_inherit.xml +7 -0
  2716. data/tests/wellformed/lang/feed_copyright_xml_lang_inherit_2.xml +7 -0
  2717. data/tests/wellformed/lang/feed_copyright_xml_lang_inherit_3.xml +8 -0
  2718. data/tests/wellformed/lang/feed_copyright_xml_lang_inherit_4.xml +8 -0
  2719. data/tests/wellformed/lang/feed_info_xml_lang.xml +7 -0
  2720. data/tests/wellformed/lang/feed_info_xml_lang_blank.xml +7 -0
  2721. data/tests/wellformed/lang/feed_info_xml_lang_inherit.xml +7 -0
  2722. data/tests/wellformed/lang/feed_info_xml_lang_inherit_2.xml +7 -0
  2723. data/tests/wellformed/lang/feed_info_xml_lang_inherit_3.xml +8 -0
  2724. data/tests/wellformed/lang/feed_info_xml_lang_inherit_4.xml +8 -0
  2725. data/tests/wellformed/lang/feed_language.xml +9 -0
  2726. data/tests/wellformed/lang/feed_language_override.xml +9 -0
  2727. data/tests/wellformed/lang/feed_not_xml_lang.xml +7 -0
  2728. data/tests/wellformed/lang/feed_not_xml_lang_2.xml +7 -0
  2729. data/tests/wellformed/lang/feed_tagline_xml_lang.xml +7 -0
  2730. data/tests/wellformed/lang/feed_tagline_xml_lang_blank.xml +7 -0
  2731. data/tests/wellformed/lang/feed_tagline_xml_lang_inherit.xml +7 -0
  2732. data/tests/wellformed/lang/feed_tagline_xml_lang_inherit_2.xml +7 -0
  2733. data/tests/wellformed/lang/feed_tagline_xml_lang_inherit_3.xml +8 -0
  2734. data/tests/wellformed/lang/feed_tagline_xml_lang_inherit_4.xml +8 -0
  2735. data/tests/wellformed/lang/feed_title_xml_lang.xml +7 -0
  2736. data/tests/wellformed/lang/feed_title_xml_lang_blank.xml +7 -0
  2737. data/tests/wellformed/lang/feed_title_xml_lang_inherit.xml +7 -0
  2738. data/tests/wellformed/lang/feed_title_xml_lang_inherit_2.xml +7 -0
  2739. data/tests/wellformed/lang/feed_title_xml_lang_inherit_3.xml +8 -0
  2740. data/tests/wellformed/lang/feed_title_xml_lang_inherit_4.xml +8 -0
  2741. data/tests/wellformed/lang/feed_xml_lang.xml +6 -0
  2742. data/tests/wellformed/lang/http_content_language.xml +7 -0
  2743. data/tests/wellformed/lang/http_content_language_entry_title_inherit.xml +10 -0
  2744. data/tests/wellformed/lang/http_content_language_entry_title_inherit_2.xml +11 -0
  2745. data/tests/wellformed/lang/http_content_language_feed_language.xml +10 -0
  2746. data/tests/wellformed/lang/http_content_language_feed_xml_lang.xml +7 -0
  2747. data/tests/wellformed/lang/item_content_encoded_xml_lang.xml +11 -0
  2748. data/tests/wellformed/lang/item_content_encoded_xml_lang_inherit.xml +11 -0
  2749. data/tests/wellformed/lang/item_dc_language.xml +11 -0
  2750. data/tests/wellformed/lang/item_fullitem_xml_lang.xml +11 -0
  2751. data/tests/wellformed/lang/item_fullitem_xml_lang_inherit.xml +11 -0
  2752. data/tests/wellformed/lang/item_xhtml_body_xml_lang.xml +13 -0
  2753. data/tests/wellformed/lang/item_xhtml_body_xml_lang_inherit.xml +13 -0
  2754. data/tests/wellformed/namespace/rss1.0withModules.xml +47 -0
  2755. data/tests/wellformed/namespace/rss1.0withModulesNoDefNS.xml +48 -0
  2756. data/tests/wellformed/namespace/rss1.0withModulesNoDefNSLocalNameClash.xml +53 -0
  2757. data/tests/wellformed/namespace/rss2.0NSwithModules.xml +50 -0
  2758. data/tests/wellformed/namespace/rss2.0NSwithModulesNoDefNS.xml +50 -0
  2759. data/tests/wellformed/namespace/rss2.0NSwithModulesNoDefNSLocalNameClash.xml +58 -0
  2760. data/tests/wellformed/namespace/rss2.0noNSwithModules.xml +49 -0
  2761. data/tests/wellformed/namespace/rss2.0noNSwithModulesLocalNameClash.xml +57 -0
  2762. data/tests/wellformed/rdf/doctype_contains_entity_decl.xml +17 -0
  2763. data/tests/wellformed/rdf/rdf_channel_description.xml +9 -0
  2764. data/tests/wellformed/rdf/rdf_channel_link.xml +9 -0
  2765. data/tests/wellformed/rdf/rdf_channel_title.xml +9 -0
  2766. data/tests/wellformed/rdf/rdf_item_description.xml +16 -0
  2767. data/tests/wellformed/rdf/rdf_item_link.xml +16 -0
  2768. data/tests/wellformed/rdf/rdf_item_rdf_about.xml +15 -0
  2769. data/tests/wellformed/rdf/rdf_item_title.xml +16 -0
  2770. data/tests/wellformed/rdf/rss090_channel_title.xml +12 -0
  2771. data/tests/wellformed/rdf/rss090_item_title.xml +12 -0
  2772. data/tests/wellformed/rdf/rss_version_10.xml +6 -0
  2773. data/tests/wellformed/rdf/rss_version_10_not_default_ns.xml +8 -0
  2774. data/tests/wellformed/rss/aaa_wellformed.xml +6 -0
  2775. data/tests/wellformed/rss/channel_author.xml +9 -0
  2776. data/tests/wellformed/rss/channel_author_map_author_detail_email.xml +9 -0
  2777. data/tests/wellformed/rss/channel_author_map_author_detail_email_2.xml +9 -0
  2778. data/tests/wellformed/rss/channel_author_map_author_detail_email_3.xml +9 -0
  2779. data/tests/wellformed/rss/channel_author_map_author_detail_name.xml +9 -0
  2780. data/tests/wellformed/rss/channel_author_map_author_detail_name_2.xml +9 -0
  2781. data/tests/wellformed/rss/channel_category.xml +9 -0
  2782. data/tests/wellformed/rss/channel_category_domain.xml +9 -0
  2783. data/tests/wellformed/rss/channel_category_multiple.xml +10 -0
  2784. data/tests/wellformed/rss/channel_category_multiple_2.xml +10 -0
  2785. data/tests/wellformed/rss/channel_cloud_domain.xml +9 -0
  2786. data/tests/wellformed/rss/channel_cloud_path.xml +9 -0
  2787. data/tests/wellformed/rss/channel_cloud_port.xml +9 -0
  2788. data/tests/wellformed/rss/channel_cloud_protocol.xml +9 -0
  2789. data/tests/wellformed/rss/channel_cloud_registerProcedure.xml +9 -0
  2790. data/tests/wellformed/rss/channel_copyright.xml +9 -0
  2791. data/tests/wellformed/rss/channel_dc_author.xml +9 -0
  2792. data/tests/wellformed/rss/channel_dc_author_map_author_detail_email.xml +9 -0
  2793. data/tests/wellformed/rss/channel_dc_author_map_author_detail_name.xml +9 -0
  2794. data/tests/wellformed/rss/channel_dc_contributor.xml +9 -0
  2795. data/tests/wellformed/rss/channel_dc_creator.xml +9 -0
  2796. data/tests/wellformed/rss/channel_dc_creator_map_author_detail_email.xml +9 -0
  2797. data/tests/wellformed/rss/channel_dc_creator_map_author_detail_name.xml +9 -0
  2798. data/tests/wellformed/rss/channel_dc_publisher.xml +9 -0
  2799. data/tests/wellformed/rss/channel_dc_publisher_email.xml +9 -0
  2800. data/tests/wellformed/rss/channel_dc_publisher_name.xml +9 -0
  2801. data/tests/wellformed/rss/channel_dc_rights.xml +9 -0
  2802. data/tests/wellformed/rss/channel_dc_subject.xml +9 -0
  2803. data/tests/wellformed/rss/channel_dc_subject_2.xml +9 -0
  2804. data/tests/wellformed/rss/channel_dc_subject_multiple.xml +10 -0
  2805. data/tests/wellformed/rss/channel_dc_title.xml +9 -0
  2806. data/tests/wellformed/rss/channel_description.xml +9 -0
  2807. data/tests/wellformed/rss/channel_description_escaped_markup.xml +9 -0
  2808. data/tests/wellformed/rss/channel_description_map_tagline.xml +9 -0
  2809. data/tests/wellformed/rss/channel_description_naked_markup.xml +9 -0
  2810. data/tests/wellformed/rss/channel_description_shorttag.xml +10 -0
  2811. data/tests/wellformed/rss/channel_docs.xml +9 -0
  2812. data/tests/wellformed/rss/channel_generator.xml +9 -0
  2813. data/tests/wellformed/rss/channel_image_description.xml +16 -0
  2814. data/tests/wellformed/rss/channel_image_height.xml +16 -0
  2815. data/tests/wellformed/rss/channel_image_link.xml +16 -0
  2816. data/tests/wellformed/rss/channel_image_link_conflict.xml +12 -0
  2817. data/tests/wellformed/rss/channel_image_title.xml +16 -0
  2818. data/tests/wellformed/rss/channel_image_title_conflict.xml +12 -0
  2819. data/tests/wellformed/rss/channel_image_url.xml +16 -0
  2820. data/tests/wellformed/rss/channel_image_width.xml +16 -0
  2821. data/tests/wellformed/rss/channel_link.xml +9 -0
  2822. data/tests/wellformed/rss/channel_managingEditor.xml +9 -0
  2823. data/tests/wellformed/rss/channel_managingEditor_map_author_detail_email.xml +9 -0
  2824. data/tests/wellformed/rss/channel_managingEditor_map_author_detail_name.xml +9 -0
  2825. data/tests/wellformed/rss/channel_textInput_description.xml +14 -0
  2826. data/tests/wellformed/rss/channel_textInput_description_conflict.xml +12 -0
  2827. data/tests/wellformed/rss/channel_textInput_link.xml +12 -0
  2828. data/tests/wellformed/rss/channel_textInput_link_conflict.xml +12 -0
  2829. data/tests/wellformed/rss/channel_textInput_name.xml +11 -0
  2830. data/tests/wellformed/rss/channel_textInput_title.xml +12 -0
  2831. data/tests/wellformed/rss/channel_textInput_title_conflict.xml +12 -0
  2832. data/tests/wellformed/rss/channel_title.xml +9 -0
  2833. data/tests/wellformed/rss/channel_title_apos.xml +9 -0
  2834. data/tests/wellformed/rss/channel_title_gt.xml +9 -0
  2835. data/tests/wellformed/rss/channel_title_lt.xml +9 -0
  2836. data/tests/wellformed/rss/channel_ttl.xml +9 -0
  2837. data/tests/wellformed/rss/channel_webMaster.xml +9 -0
  2838. data/tests/wellformed/rss/channel_webMaster_email.xml +9 -0
  2839. data/tests/wellformed/rss/channel_webMaster_name.xml +9 -0
  2840. data/tests/wellformed/rss/item_author.xml +11 -0
  2841. data/tests/wellformed/rss/item_author_map_author_detail_email.xml +11 -0
  2842. data/tests/wellformed/rss/item_author_map_author_detail_name.xml +11 -0
  2843. data/tests/wellformed/rss/item_category.xml +11 -0
  2844. data/tests/wellformed/rss/item_category_domain.xml +11 -0
  2845. data/tests/wellformed/rss/item_category_multiple.xml +12 -0
  2846. data/tests/wellformed/rss/item_category_multiple_2.xml +12 -0
  2847. data/tests/wellformed/rss/item_comments.xml +11 -0
  2848. data/tests/wellformed/rss/item_content_encoded.xml +11 -0
  2849. data/tests/wellformed/rss/item_content_encoded_mode.xml +11 -0
  2850. data/tests/wellformed/rss/item_content_encoded_type.xml +11 -0
  2851. data/tests/wellformed/rss/item_dc_author.xml +11 -0
  2852. data/tests/wellformed/rss/item_dc_author_map_author_detail_email.xml +11 -0
  2853. data/tests/wellformed/rss/item_dc_author_map_author_detail_name.xml +11 -0
  2854. data/tests/wellformed/rss/item_dc_contributor.xml +11 -0
  2855. data/tests/wellformed/rss/item_dc_creator.xml +11 -0
  2856. data/tests/wellformed/rss/item_dc_creator_map_author_detail_email.xml +11 -0
  2857. data/tests/wellformed/rss/item_dc_creator_map_author_detail_name.xml +11 -0
  2858. data/tests/wellformed/rss/item_dc_publisher.xml +11 -0
  2859. data/tests/wellformed/rss/item_dc_publisher_email.xml +11 -0
  2860. data/tests/wellformed/rss/item_dc_publisher_name.xml +11 -0
  2861. data/tests/wellformed/rss/item_dc_rights.xml +11 -0
  2862. data/tests/wellformed/rss/item_dc_subject.xml +11 -0
  2863. data/tests/wellformed/rss/item_dc_subject_2.xml +11 -0
  2864. data/tests/wellformed/rss/item_dc_subject_multiple.xml +12 -0
  2865. data/tests/wellformed/rss/item_dc_title.xml +11 -0
  2866. data/tests/wellformed/rss/item_description.xml +11 -0
  2867. data/tests/wellformed/rss/item_description_and_summary.xml +12 -0
  2868. data/tests/wellformed/rss/item_description_br.xml +11 -0
  2869. data/tests/wellformed/rss/item_description_br_shorttag.xml +12 -0
  2870. data/tests/wellformed/rss/item_description_escaped_markup.xml +11 -0
  2871. data/tests/wellformed/rss/item_description_map_summary.xml +11 -0
  2872. data/tests/wellformed/rss/item_description_naked_markup.xml +11 -0
  2873. data/tests/wellformed/rss/item_description_not_a_doctype.xml +9 -0
  2874. data/tests/wellformed/rss/item_enclosure_length.xml +12 -0
  2875. data/tests/wellformed/rss/item_enclosure_multiple.xml +13 -0
  2876. data/tests/wellformed/rss/item_enclosure_type.xml +12 -0
  2877. data/tests/wellformed/rss/item_enclosure_url.xml +12 -0
  2878. data/tests/wellformed/rss/item_fullitem.xml +11 -0
  2879. data/tests/wellformed/rss/item_fullitem_mode.xml +11 -0
  2880. data/tests/wellformed/rss/item_fullitem_type.xml +11 -0
  2881. data/tests/wellformed/rss/item_guid.xml +11 -0
  2882. data/tests/wellformed/rss/item_guid_conflict_link.xml +12 -0
  2883. data/tests/wellformed/rss/item_guid_guidislink.xml +11 -0
  2884. data/tests/wellformed/rss/item_guid_isPermaLink_conflict_link.xml +12 -0
  2885. data/tests/wellformed/rss/item_guid_isPermaLink_conflict_link_not_guidislink.xml +12 -0
  2886. data/tests/wellformed/rss/item_guid_isPermaLink_guidislink.xml +11 -0
  2887. data/tests/wellformed/rss/item_guid_isPermaLink_map_link.xml +11 -0
  2888. data/tests/wellformed/rss/item_guid_map_link.xml +11 -0
  2889. data/tests/wellformed/rss/item_guid_not_permalink.xml +11 -0
  2890. data/tests/wellformed/rss/item_guid_not_permalink_conflict_link.xml +12 -0
  2891. data/tests/wellformed/rss/item_guid_not_permalink_not_guidislink.xml +11 -0
  2892. data/tests/wellformed/rss/item_guid_not_permalink_not_guidislink_2.xml +12 -0
  2893. data/tests/wellformed/rss/item_link.xml +11 -0
  2894. data/tests/wellformed/rss/item_source.xml +12 -0
  2895. data/tests/wellformed/rss/item_source_url.xml +12 -0
  2896. data/tests/wellformed/rss/item_summary_and_description.xml +12 -0
  2897. data/tests/wellformed/rss/item_title.xml +11 -0
  2898. data/tests/wellformed/rss/item_xhtml_body.xml +13 -0
  2899. data/tests/wellformed/rss/item_xhtml_body_mode.xml +13 -0
  2900. data/tests/wellformed/rss/item_xhtml_body_type.xml +13 -0
  2901. data/tests/wellformed/rss/rss_namespace_1.xml +9 -0
  2902. data/tests/wellformed/rss/rss_namespace_2.xml +9 -0
  2903. data/tests/wellformed/rss/rss_namespace_3.xml +9 -0
  2904. data/tests/wellformed/rss/rss_namespace_4.xml +9 -0
  2905. data/tests/wellformed/rss/rss_version_090.xml +6 -0
  2906. data/tests/wellformed/rss/rss_version_091_netscape.xml +7 -0
  2907. data/tests/wellformed/rss/rss_version_091_userland.xml +6 -0
  2908. data/tests/wellformed/rss/rss_version_092.xml +6 -0
  2909. data/tests/wellformed/rss/rss_version_093.xml +6 -0
  2910. data/tests/wellformed/rss/rss_version_094.xml +6 -0
  2911. data/tests/wellformed/rss/rss_version_20.xml +6 -0
  2912. data/tests/wellformed/rss/rss_version_201.xml +6 -0
  2913. data/tests/wellformed/rss/rss_version_21.xml +6 -0
  2914. data/tests/wellformed/rss/rss_version_missing.xml +9 -0
  2915. data/tests/wellformed/sanitize/entry_content_applet.xml +9 -0
  2916. data/tests/wellformed/sanitize/entry_content_blink.xml +9 -0
  2917. data/tests/wellformed/sanitize/entry_content_crazy.xml +75 -0
  2918. data/tests/wellformed/sanitize/entry_content_embed.xml +9 -0
  2919. data/tests/wellformed/sanitize/entry_content_frame.xml +9 -0
  2920. data/tests/wellformed/sanitize/entry_content_iframe.xml +9 -0
  2921. data/tests/wellformed/sanitize/entry_content_link.xml +9 -0
  2922. data/tests/wellformed/sanitize/entry_content_meta.xml +9 -0
  2923. data/tests/wellformed/sanitize/entry_content_object.xml +9 -0
  2924. data/tests/wellformed/sanitize/entry_content_onabort.xml +9 -0
  2925. data/tests/wellformed/sanitize/entry_content_onblur.xml +9 -0
  2926. data/tests/wellformed/sanitize/entry_content_onchange.xml +9 -0
  2927. data/tests/wellformed/sanitize/entry_content_onclick.xml +9 -0
  2928. data/tests/wellformed/sanitize/entry_content_ondblclick.xml +9 -0
  2929. data/tests/wellformed/sanitize/entry_content_onerror.xml +9 -0
  2930. data/tests/wellformed/sanitize/entry_content_onfocus.xml +9 -0
  2931. data/tests/wellformed/sanitize/entry_content_onkeydown.xml +9 -0
  2932. data/tests/wellformed/sanitize/entry_content_onkeypress.xml +9 -0
  2933. data/tests/wellformed/sanitize/entry_content_onkeyup.xml +9 -0
  2934. data/tests/wellformed/sanitize/entry_content_onload.xml +9 -0
  2935. data/tests/wellformed/sanitize/entry_content_onmousedown.xml +9 -0
  2936. data/tests/wellformed/sanitize/entry_content_onmouseout.xml +9 -0
  2937. data/tests/wellformed/sanitize/entry_content_onmouseover.xml +9 -0
  2938. data/tests/wellformed/sanitize/entry_content_onmouseup.xml +9 -0
  2939. data/tests/wellformed/sanitize/entry_content_onreset.xml +9 -0
  2940. data/tests/wellformed/sanitize/entry_content_onresize.xml +9 -0
  2941. data/tests/wellformed/sanitize/entry_content_onsubmit.xml +9 -0
  2942. data/tests/wellformed/sanitize/entry_content_onunload.xml +9 -0
  2943. data/tests/wellformed/sanitize/entry_content_script.xml +9 -0
  2944. data/tests/wellformed/sanitize/entry_content_script_base64.xml +12 -0
  2945. data/tests/wellformed/sanitize/entry_content_script_cdata.xml +9 -0
  2946. data/tests/wellformed/sanitize/entry_content_script_inline.xml +9 -0
  2947. data/tests/wellformed/sanitize/entry_content_style.xml +9 -0
  2948. data/tests/wellformed/sanitize/entry_summary_applet.xml +9 -0
  2949. data/tests/wellformed/sanitize/entry_summary_blink.xml +9 -0
  2950. data/tests/wellformed/sanitize/entry_summary_crazy.xml +75 -0
  2951. data/tests/wellformed/sanitize/entry_summary_embed.xml +9 -0
  2952. data/tests/wellformed/sanitize/entry_summary_frame.xml +9 -0
  2953. data/tests/wellformed/sanitize/entry_summary_iframe.xml +9 -0
  2954. data/tests/wellformed/sanitize/entry_summary_link.xml +9 -0
  2955. data/tests/wellformed/sanitize/entry_summary_meta.xml +9 -0
  2956. data/tests/wellformed/sanitize/entry_summary_object.xml +9 -0
  2957. data/tests/wellformed/sanitize/entry_summary_onabort.xml +9 -0
  2958. data/tests/wellformed/sanitize/entry_summary_onblur.xml +9 -0
  2959. data/tests/wellformed/sanitize/entry_summary_onchange.xml +9 -0
  2960. data/tests/wellformed/sanitize/entry_summary_onclick.xml +9 -0
  2961. data/tests/wellformed/sanitize/entry_summary_ondblclick.xml +9 -0
  2962. data/tests/wellformed/sanitize/entry_summary_onerror.xml +9 -0
  2963. data/tests/wellformed/sanitize/entry_summary_onfocus.xml +9 -0
  2964. data/tests/wellformed/sanitize/entry_summary_onkeydown.xml +9 -0
  2965. data/tests/wellformed/sanitize/entry_summary_onkeypress.xml +9 -0
  2966. data/tests/wellformed/sanitize/entry_summary_onkeyup.xml +9 -0
  2967. data/tests/wellformed/sanitize/entry_summary_onload.xml +9 -0
  2968. data/tests/wellformed/sanitize/entry_summary_onmousedown.xml +9 -0
  2969. data/tests/wellformed/sanitize/entry_summary_onmouseout.xml +9 -0
  2970. data/tests/wellformed/sanitize/entry_summary_onmouseover.xml +9 -0
  2971. data/tests/wellformed/sanitize/entry_summary_onmouseup.xml +9 -0
  2972. data/tests/wellformed/sanitize/entry_summary_onreset.xml +9 -0
  2973. data/tests/wellformed/sanitize/entry_summary_onresize.xml +9 -0
  2974. data/tests/wellformed/sanitize/entry_summary_onsubmit.xml +9 -0
  2975. data/tests/wellformed/sanitize/entry_summary_onunload.xml +9 -0
  2976. data/tests/wellformed/sanitize/entry_summary_script.xml +9 -0
  2977. data/tests/wellformed/sanitize/entry_summary_script_base64.xml +12 -0
  2978. data/tests/wellformed/sanitize/entry_summary_script_cdata.xml +9 -0
  2979. data/tests/wellformed/sanitize/entry_summary_script_inline.xml +9 -0
  2980. data/tests/wellformed/sanitize/entry_summary_script_map_description.xml +9 -0
  2981. data/tests/wellformed/sanitize/entry_summary_style.xml +9 -0
  2982. data/tests/wellformed/sanitize/entry_title_applet.xml +9 -0
  2983. data/tests/wellformed/sanitize/entry_title_blink.xml +9 -0
  2984. data/tests/wellformed/sanitize/entry_title_crazy.xml +75 -0
  2985. data/tests/wellformed/sanitize/entry_title_embed.xml +9 -0
  2986. data/tests/wellformed/sanitize/entry_title_frame.xml +9 -0
  2987. data/tests/wellformed/sanitize/entry_title_iframe.xml +9 -0
  2988. data/tests/wellformed/sanitize/entry_title_link.xml +9 -0
  2989. data/tests/wellformed/sanitize/entry_title_meta.xml +9 -0
  2990. data/tests/wellformed/sanitize/entry_title_object.xml +9 -0
  2991. data/tests/wellformed/sanitize/entry_title_onabort.xml +9 -0
  2992. data/tests/wellformed/sanitize/entry_title_onblur.xml +9 -0
  2993. data/tests/wellformed/sanitize/entry_title_onchange.xml +9 -0
  2994. data/tests/wellformed/sanitize/entry_title_onclick.xml +9 -0
  2995. data/tests/wellformed/sanitize/entry_title_ondblclick.xml +9 -0
  2996. data/tests/wellformed/sanitize/entry_title_onerror.xml +9 -0
  2997. data/tests/wellformed/sanitize/entry_title_onfocus.xml +9 -0
  2998. data/tests/wellformed/sanitize/entry_title_onkeydown.xml +9 -0
  2999. data/tests/wellformed/sanitize/entry_title_onkeypress.xml +9 -0
  3000. data/tests/wellformed/sanitize/entry_title_onkeyup.xml +9 -0
  3001. data/tests/wellformed/sanitize/entry_title_onload.xml +9 -0
  3002. data/tests/wellformed/sanitize/entry_title_onmousedown.xml +9 -0
  3003. data/tests/wellformed/sanitize/entry_title_onmouseout.xml +9 -0
  3004. data/tests/wellformed/sanitize/entry_title_onmouseover.xml +9 -0
  3005. data/tests/wellformed/sanitize/entry_title_onmouseup.xml +9 -0
  3006. data/tests/wellformed/sanitize/entry_title_onreset.xml +9 -0
  3007. data/tests/wellformed/sanitize/entry_title_onresize.xml +9 -0
  3008. data/tests/wellformed/sanitize/entry_title_onsubmit.xml +9 -0
  3009. data/tests/wellformed/sanitize/entry_title_onunload.xml +9 -0
  3010. data/tests/wellformed/sanitize/entry_title_script.xml +9 -0
  3011. data/tests/wellformed/sanitize/entry_title_script_cdata.xml +9 -0
  3012. data/tests/wellformed/sanitize/entry_title_script_inline.xml +9 -0
  3013. data/tests/wellformed/sanitize/entry_title_style.xml +9 -0
  3014. data/tests/wellformed/sanitize/feed_copyright_applet.xml +7 -0
  3015. data/tests/wellformed/sanitize/feed_copyright_blink.xml +7 -0
  3016. data/tests/wellformed/sanitize/feed_copyright_crazy.xml +73 -0
  3017. data/tests/wellformed/sanitize/feed_copyright_embed.xml +7 -0
  3018. data/tests/wellformed/sanitize/feed_copyright_frame.xml +7 -0
  3019. data/tests/wellformed/sanitize/feed_copyright_iframe.xml +7 -0
  3020. data/tests/wellformed/sanitize/feed_copyright_link.xml +7 -0
  3021. data/tests/wellformed/sanitize/feed_copyright_meta.xml +7 -0
  3022. data/tests/wellformed/sanitize/feed_copyright_object.xml +7 -0
  3023. data/tests/wellformed/sanitize/feed_copyright_onabort.xml +7 -0
  3024. data/tests/wellformed/sanitize/feed_copyright_onblur.xml +7 -0
  3025. data/tests/wellformed/sanitize/feed_copyright_onchange.xml +7 -0
  3026. data/tests/wellformed/sanitize/feed_copyright_onclick.xml +7 -0
  3027. data/tests/wellformed/sanitize/feed_copyright_ondblclick.xml +7 -0
  3028. data/tests/wellformed/sanitize/feed_copyright_onerror.xml +7 -0
  3029. data/tests/wellformed/sanitize/feed_copyright_onfocus.xml +7 -0
  3030. data/tests/wellformed/sanitize/feed_copyright_onkeydown.xml +7 -0
  3031. data/tests/wellformed/sanitize/feed_copyright_onkeypress.xml +7 -0
  3032. data/tests/wellformed/sanitize/feed_copyright_onkeyup.xml +7 -0
  3033. data/tests/wellformed/sanitize/feed_copyright_onload.xml +7 -0
  3034. data/tests/wellformed/sanitize/feed_copyright_onmousedown.xml +7 -0
  3035. data/tests/wellformed/sanitize/feed_copyright_onmouseout.xml +7 -0
  3036. data/tests/wellformed/sanitize/feed_copyright_onmouseover.xml +7 -0
  3037. data/tests/wellformed/sanitize/feed_copyright_onmouseup.xml +7 -0
  3038. data/tests/wellformed/sanitize/feed_copyright_onreset.xml +7 -0
  3039. data/tests/wellformed/sanitize/feed_copyright_onresize.xml +7 -0
  3040. data/tests/wellformed/sanitize/feed_copyright_onsubmit.xml +7 -0
  3041. data/tests/wellformed/sanitize/feed_copyright_onunload.xml +7 -0
  3042. data/tests/wellformed/sanitize/feed_copyright_script.xml +7 -0
  3043. data/tests/wellformed/sanitize/feed_copyright_script_cdata.xml +7 -0
  3044. data/tests/wellformed/sanitize/feed_copyright_script_inline.xml +7 -0
  3045. data/tests/wellformed/sanitize/feed_copyright_style.xml +7 -0
  3046. data/tests/wellformed/sanitize/feed_info_applet.xml +7 -0
  3047. data/tests/wellformed/sanitize/feed_info_blink.xml +7 -0
  3048. data/tests/wellformed/sanitize/feed_info_crazy.xml +73 -0
  3049. data/tests/wellformed/sanitize/feed_info_embed.xml +7 -0
  3050. data/tests/wellformed/sanitize/feed_info_frame.xml +7 -0
  3051. data/tests/wellformed/sanitize/feed_info_iframe.xml +7 -0
  3052. data/tests/wellformed/sanitize/feed_info_link.xml +7 -0
  3053. data/tests/wellformed/sanitize/feed_info_meta.xml +7 -0
  3054. data/tests/wellformed/sanitize/feed_info_object.xml +7 -0
  3055. data/tests/wellformed/sanitize/feed_info_onabort.xml +7 -0
  3056. data/tests/wellformed/sanitize/feed_info_onblur.xml +7 -0
  3057. data/tests/wellformed/sanitize/feed_info_onchange.xml +7 -0
  3058. data/tests/wellformed/sanitize/feed_info_onclick.xml +7 -0
  3059. data/tests/wellformed/sanitize/feed_info_ondblclick.xml +7 -0
  3060. data/tests/wellformed/sanitize/feed_info_onerror.xml +7 -0
  3061. data/tests/wellformed/sanitize/feed_info_onfocus.xml +7 -0
  3062. data/tests/wellformed/sanitize/feed_info_onkeydown.xml +7 -0
  3063. data/tests/wellformed/sanitize/feed_info_onkeypress.xml +7 -0
  3064. data/tests/wellformed/sanitize/feed_info_onkeyup.xml +7 -0
  3065. data/tests/wellformed/sanitize/feed_info_onload.xml +7 -0
  3066. data/tests/wellformed/sanitize/feed_info_onmousedown.xml +7 -0
  3067. data/tests/wellformed/sanitize/feed_info_onmouseout.xml +7 -0
  3068. data/tests/wellformed/sanitize/feed_info_onmouseover.xml +7 -0
  3069. data/tests/wellformed/sanitize/feed_info_onmouseup.xml +7 -0
  3070. data/tests/wellformed/sanitize/feed_info_onreset.xml +7 -0
  3071. data/tests/wellformed/sanitize/feed_info_onresize.xml +7 -0
  3072. data/tests/wellformed/sanitize/feed_info_onsubmit.xml +7 -0
  3073. data/tests/wellformed/sanitize/feed_info_onunload.xml +7 -0
  3074. data/tests/wellformed/sanitize/feed_info_script.xml +7 -0
  3075. data/tests/wellformed/sanitize/feed_info_script_cdata.xml +7 -0
  3076. data/tests/wellformed/sanitize/feed_info_script_inline.xml +7 -0
  3077. data/tests/wellformed/sanitize/feed_info_style.xml +7 -0
  3078. data/tests/wellformed/sanitize/feed_subtitle_applet.xml +7 -0
  3079. data/tests/wellformed/sanitize/feed_subtitle_blink.xml +7 -0
  3080. data/tests/wellformed/sanitize/feed_subtitle_crazy.xml +73 -0
  3081. data/tests/wellformed/sanitize/feed_subtitle_embed.xml +7 -0
  3082. data/tests/wellformed/sanitize/feed_subtitle_frame.xml +7 -0
  3083. data/tests/wellformed/sanitize/feed_subtitle_iframe.xml +7 -0
  3084. data/tests/wellformed/sanitize/feed_subtitle_link.xml +7 -0
  3085. data/tests/wellformed/sanitize/feed_subtitle_meta.xml +7 -0
  3086. data/tests/wellformed/sanitize/feed_subtitle_object.xml +7 -0
  3087. data/tests/wellformed/sanitize/feed_subtitle_onabort.xml +7 -0
  3088. data/tests/wellformed/sanitize/feed_subtitle_onblur.xml +7 -0
  3089. data/tests/wellformed/sanitize/feed_subtitle_onchange.xml +7 -0
  3090. data/tests/wellformed/sanitize/feed_subtitle_onclick.xml +7 -0
  3091. data/tests/wellformed/sanitize/feed_subtitle_ondblclick.xml +7 -0
  3092. data/tests/wellformed/sanitize/feed_subtitle_onerror.xml +7 -0
  3093. data/tests/wellformed/sanitize/feed_subtitle_onfocus.xml +7 -0
  3094. data/tests/wellformed/sanitize/feed_subtitle_onkeydown.xml +7 -0
  3095. data/tests/wellformed/sanitize/feed_subtitle_onkeypress.xml +7 -0
  3096. data/tests/wellformed/sanitize/feed_subtitle_onkeyup.xml +7 -0
  3097. data/tests/wellformed/sanitize/feed_subtitle_onload.xml +7 -0
  3098. data/tests/wellformed/sanitize/feed_subtitle_onmousedown.xml +7 -0
  3099. data/tests/wellformed/sanitize/feed_subtitle_onmouseout.xml +7 -0
  3100. data/tests/wellformed/sanitize/feed_subtitle_onmouseover.xml +7 -0
  3101. data/tests/wellformed/sanitize/feed_subtitle_onmouseup.xml +7 -0
  3102. data/tests/wellformed/sanitize/feed_subtitle_onreset.xml +7 -0
  3103. data/tests/wellformed/sanitize/feed_subtitle_onresize.xml +7 -0
  3104. data/tests/wellformed/sanitize/feed_subtitle_onsubmit.xml +7 -0
  3105. data/tests/wellformed/sanitize/feed_subtitle_onunload.xml +7 -0
  3106. data/tests/wellformed/sanitize/feed_subtitle_script.xml +7 -0
  3107. data/tests/wellformed/sanitize/feed_subtitle_script_cdata.xml +7 -0
  3108. data/tests/wellformed/sanitize/feed_subtitle_script_inline.xml +7 -0
  3109. data/tests/wellformed/sanitize/feed_subtitle_style.xml +7 -0
  3110. data/tests/wellformed/sanitize/feed_tagline_applet.xml +7 -0
  3111. data/tests/wellformed/sanitize/feed_tagline_blink.xml +7 -0
  3112. data/tests/wellformed/sanitize/feed_tagline_crazy.xml +73 -0
  3113. data/tests/wellformed/sanitize/feed_tagline_embed.xml +7 -0
  3114. data/tests/wellformed/sanitize/feed_tagline_frame.xml +7 -0
  3115. data/tests/wellformed/sanitize/feed_tagline_iframe.xml +7 -0
  3116. data/tests/wellformed/sanitize/feed_tagline_link.xml +7 -0
  3117. data/tests/wellformed/sanitize/feed_tagline_meta.xml +7 -0
  3118. data/tests/wellformed/sanitize/feed_tagline_object.xml +7 -0
  3119. data/tests/wellformed/sanitize/feed_tagline_onabort.xml +7 -0
  3120. data/tests/wellformed/sanitize/feed_tagline_onblur.xml +7 -0
  3121. data/tests/wellformed/sanitize/feed_tagline_onchange.xml +7 -0
  3122. data/tests/wellformed/sanitize/feed_tagline_onclick.xml +7 -0
  3123. data/tests/wellformed/sanitize/feed_tagline_ondblclick.xml +7 -0
  3124. data/tests/wellformed/sanitize/feed_tagline_onerror.xml +7 -0
  3125. data/tests/wellformed/sanitize/feed_tagline_onfocus.xml +7 -0
  3126. data/tests/wellformed/sanitize/feed_tagline_onkeydown.xml +7 -0
  3127. data/tests/wellformed/sanitize/feed_tagline_onkeypress.xml +7 -0
  3128. data/tests/wellformed/sanitize/feed_tagline_onkeyup.xml +7 -0
  3129. data/tests/wellformed/sanitize/feed_tagline_onload.xml +7 -0
  3130. data/tests/wellformed/sanitize/feed_tagline_onmousedown.xml +7 -0
  3131. data/tests/wellformed/sanitize/feed_tagline_onmouseout.xml +7 -0
  3132. data/tests/wellformed/sanitize/feed_tagline_onmouseover.xml +7 -0
  3133. data/tests/wellformed/sanitize/feed_tagline_onmouseup.xml +7 -0
  3134. data/tests/wellformed/sanitize/feed_tagline_onreset.xml +7 -0
  3135. data/tests/wellformed/sanitize/feed_tagline_onresize.xml +7 -0
  3136. data/tests/wellformed/sanitize/feed_tagline_onsubmit.xml +7 -0
  3137. data/tests/wellformed/sanitize/feed_tagline_onunload.xml +7 -0
  3138. data/tests/wellformed/sanitize/feed_tagline_script.xml +7 -0
  3139. data/tests/wellformed/sanitize/feed_tagline_script_cdata.xml +7 -0
  3140. data/tests/wellformed/sanitize/feed_tagline_script_inline.xml +7 -0
  3141. data/tests/wellformed/sanitize/feed_tagline_script_map_description.xml +7 -0
  3142. data/tests/wellformed/sanitize/feed_tagline_style.xml +7 -0
  3143. data/tests/wellformed/sanitize/feed_title_applet.xml +7 -0
  3144. data/tests/wellformed/sanitize/feed_title_blink.xml +7 -0
  3145. data/tests/wellformed/sanitize/feed_title_crazy.xml +73 -0
  3146. data/tests/wellformed/sanitize/feed_title_embed.xml +7 -0
  3147. data/tests/wellformed/sanitize/feed_title_frame.xml +7 -0
  3148. data/tests/wellformed/sanitize/feed_title_iframe.xml +7 -0
  3149. data/tests/wellformed/sanitize/feed_title_link.xml +7 -0
  3150. data/tests/wellformed/sanitize/feed_title_meta.xml +7 -0
  3151. data/tests/wellformed/sanitize/feed_title_object.xml +7 -0
  3152. data/tests/wellformed/sanitize/feed_title_onabort.xml +7 -0
  3153. data/tests/wellformed/sanitize/feed_title_onblur.xml +7 -0
  3154. data/tests/wellformed/sanitize/feed_title_onchange.xml +7 -0
  3155. data/tests/wellformed/sanitize/feed_title_onclick.xml +7 -0
  3156. data/tests/wellformed/sanitize/feed_title_ondblclick.xml +7 -0
  3157. data/tests/wellformed/sanitize/feed_title_onerror.xml +7 -0
  3158. data/tests/wellformed/sanitize/feed_title_onfocus.xml +7 -0
  3159. data/tests/wellformed/sanitize/feed_title_onkeydown.xml +7 -0
  3160. data/tests/wellformed/sanitize/feed_title_onkeypress.xml +7 -0
  3161. data/tests/wellformed/sanitize/feed_title_onkeyup.xml +7 -0
  3162. data/tests/wellformed/sanitize/feed_title_onload.xml +7 -0
  3163. data/tests/wellformed/sanitize/feed_title_onmousedown.xml +7 -0
  3164. data/tests/wellformed/sanitize/feed_title_onmouseout.xml +7 -0
  3165. data/tests/wellformed/sanitize/feed_title_onmouseover.xml +7 -0
  3166. data/tests/wellformed/sanitize/feed_title_onmouseup.xml +7 -0
  3167. data/tests/wellformed/sanitize/feed_title_onreset.xml +7 -0
  3168. data/tests/wellformed/sanitize/feed_title_onresize.xml +7 -0
  3169. data/tests/wellformed/sanitize/feed_title_onsubmit.xml +7 -0
  3170. data/tests/wellformed/sanitize/feed_title_onunload.xml +7 -0
  3171. data/tests/wellformed/sanitize/feed_title_script.xml +7 -0
  3172. data/tests/wellformed/sanitize/feed_title_script_cdata.xml +7 -0
  3173. data/tests/wellformed/sanitize/feed_title_script_inline.xml +7 -0
  3174. data/tests/wellformed/sanitize/feed_title_style.xml +7 -0
  3175. data/tests/wellformed/sanitize/item_body_applet.xml +11 -0
  3176. data/tests/wellformed/sanitize/item_body_blink.xml +11 -0
  3177. data/tests/wellformed/sanitize/item_body_embed.xml +11 -0
  3178. data/tests/wellformed/sanitize/item_body_frame.xml +11 -0
  3179. data/tests/wellformed/sanitize/item_body_iframe.xml +11 -0
  3180. data/tests/wellformed/sanitize/item_body_link.xml +11 -0
  3181. data/tests/wellformed/sanitize/item_body_meta.xml +11 -0
  3182. data/tests/wellformed/sanitize/item_body_object.xml +11 -0
  3183. data/tests/wellformed/sanitize/item_body_onabort.xml +11 -0
  3184. data/tests/wellformed/sanitize/item_body_onblur.xml +11 -0
  3185. data/tests/wellformed/sanitize/item_body_onchange.xml +11 -0
  3186. data/tests/wellformed/sanitize/item_body_onclick.xml +11 -0
  3187. data/tests/wellformed/sanitize/item_body_ondblclick.xml +11 -0
  3188. data/tests/wellformed/sanitize/item_body_onerror.xml +11 -0
  3189. data/tests/wellformed/sanitize/item_body_onfocus.xml +11 -0
  3190. data/tests/wellformed/sanitize/item_body_onkeydown.xml +11 -0
  3191. data/tests/wellformed/sanitize/item_body_onkeypress.xml +11 -0
  3192. data/tests/wellformed/sanitize/item_body_onkeyup.xml +11 -0
  3193. data/tests/wellformed/sanitize/item_body_onload.xml +11 -0
  3194. data/tests/wellformed/sanitize/item_body_onmousedown.xml +11 -0
  3195. data/tests/wellformed/sanitize/item_body_onmouseout.xml +11 -0
  3196. data/tests/wellformed/sanitize/item_body_onmouseover.xml +11 -0
  3197. data/tests/wellformed/sanitize/item_body_onmouseup.xml +11 -0
  3198. data/tests/wellformed/sanitize/item_body_onreset.xml +11 -0
  3199. data/tests/wellformed/sanitize/item_body_onresize.xml +11 -0
  3200. data/tests/wellformed/sanitize/item_body_onsubmit.xml +11 -0
  3201. data/tests/wellformed/sanitize/item_body_onunload.xml +11 -0
  3202. data/tests/wellformed/sanitize/item_body_script.xml +11 -0
  3203. data/tests/wellformed/sanitize/item_body_script_map_content.xml +11 -0
  3204. data/tests/wellformed/sanitize/item_body_style.xml +11 -0
  3205. data/tests/wellformed/sanitize/item_content_encoded_applet.xml +11 -0
  3206. data/tests/wellformed/sanitize/item_content_encoded_blink.xml +11 -0
  3207. data/tests/wellformed/sanitize/item_content_encoded_crazy.xml +77 -0
  3208. data/tests/wellformed/sanitize/item_content_encoded_embed.xml +11 -0
  3209. data/tests/wellformed/sanitize/item_content_encoded_frame.xml +11 -0
  3210. data/tests/wellformed/sanitize/item_content_encoded_iframe.xml +11 -0
  3211. data/tests/wellformed/sanitize/item_content_encoded_link.xml +11 -0
  3212. data/tests/wellformed/sanitize/item_content_encoded_map_content.xml +11 -0
  3213. data/tests/wellformed/sanitize/item_content_encoded_meta.xml +11 -0
  3214. data/tests/wellformed/sanitize/item_content_encoded_object.xml +11 -0
  3215. data/tests/wellformed/sanitize/item_content_encoded_onabort.xml +11 -0
  3216. data/tests/wellformed/sanitize/item_content_encoded_onblur.xml +11 -0
  3217. data/tests/wellformed/sanitize/item_content_encoded_onchange.xml +11 -0
  3218. data/tests/wellformed/sanitize/item_content_encoded_onclick.xml +11 -0
  3219. data/tests/wellformed/sanitize/item_content_encoded_ondblclick.xml +11 -0
  3220. data/tests/wellformed/sanitize/item_content_encoded_onerror.xml +11 -0
  3221. data/tests/wellformed/sanitize/item_content_encoded_onfocus.xml +11 -0
  3222. data/tests/wellformed/sanitize/item_content_encoded_onkeydown.xml +11 -0
  3223. data/tests/wellformed/sanitize/item_content_encoded_onkeypress.xml +11 -0
  3224. data/tests/wellformed/sanitize/item_content_encoded_onkeyup.xml +11 -0
  3225. data/tests/wellformed/sanitize/item_content_encoded_onload.xml +11 -0
  3226. data/tests/wellformed/sanitize/item_content_encoded_onmousedown.xml +11 -0
  3227. data/tests/wellformed/sanitize/item_content_encoded_onmouseout.xml +11 -0
  3228. data/tests/wellformed/sanitize/item_content_encoded_onmouseover.xml +11 -0
  3229. data/tests/wellformed/sanitize/item_content_encoded_onmouseup.xml +11 -0
  3230. data/tests/wellformed/sanitize/item_content_encoded_onreset.xml +11 -0
  3231. data/tests/wellformed/sanitize/item_content_encoded_onresize.xml +11 -0
  3232. data/tests/wellformed/sanitize/item_content_encoded_onsubmit.xml +11 -0
  3233. data/tests/wellformed/sanitize/item_content_encoded_onunload.xml +11 -0
  3234. data/tests/wellformed/sanitize/item_content_encoded_script.xml +11 -0
  3235. data/tests/wellformed/sanitize/item_content_encoded_script_cdata.xml +11 -0
  3236. data/tests/wellformed/sanitize/item_content_encoded_script_map_content.xml +11 -0
  3237. data/tests/wellformed/sanitize/item_content_encoded_style.xml +11 -0
  3238. data/tests/wellformed/sanitize/item_description_applet.xml +11 -0
  3239. data/tests/wellformed/sanitize/item_description_blink.xml +11 -0
  3240. data/tests/wellformed/sanitize/item_description_crazy.xml +81 -0
  3241. data/tests/wellformed/sanitize/item_description_embed.xml +11 -0
  3242. data/tests/wellformed/sanitize/item_description_frame.xml +11 -0
  3243. data/tests/wellformed/sanitize/item_description_iframe.xml +11 -0
  3244. data/tests/wellformed/sanitize/item_description_link.xml +11 -0
  3245. data/tests/wellformed/sanitize/item_description_meta.xml +11 -0
  3246. data/tests/wellformed/sanitize/item_description_object.xml +11 -0
  3247. data/tests/wellformed/sanitize/item_description_onabort.xml +11 -0
  3248. data/tests/wellformed/sanitize/item_description_onblur.xml +11 -0
  3249. data/tests/wellformed/sanitize/item_description_onchange.xml +11 -0
  3250. data/tests/wellformed/sanitize/item_description_onclick.xml +11 -0
  3251. data/tests/wellformed/sanitize/item_description_ondblclick.xml +11 -0
  3252. data/tests/wellformed/sanitize/item_description_onerror.xml +11 -0
  3253. data/tests/wellformed/sanitize/item_description_onfocus.xml +11 -0
  3254. data/tests/wellformed/sanitize/item_description_onkeydown.xml +11 -0
  3255. data/tests/wellformed/sanitize/item_description_onkeypress.xml +11 -0
  3256. data/tests/wellformed/sanitize/item_description_onkeyup.xml +11 -0
  3257. data/tests/wellformed/sanitize/item_description_onload.xml +11 -0
  3258. data/tests/wellformed/sanitize/item_description_onmousedown.xml +11 -0
  3259. data/tests/wellformed/sanitize/item_description_onmouseout.xml +11 -0
  3260. data/tests/wellformed/sanitize/item_description_onmouseover.xml +11 -0
  3261. data/tests/wellformed/sanitize/item_description_onmouseup.xml +11 -0
  3262. data/tests/wellformed/sanitize/item_description_onreset.xml +11 -0
  3263. data/tests/wellformed/sanitize/item_description_onresize.xml +11 -0
  3264. data/tests/wellformed/sanitize/item_description_onsubmit.xml +11 -0
  3265. data/tests/wellformed/sanitize/item_description_onunload.xml +11 -0
  3266. data/tests/wellformed/sanitize/item_description_script.xml +11 -0
  3267. data/tests/wellformed/sanitize/item_description_script_cdata.xml +11 -0
  3268. data/tests/wellformed/sanitize/item_description_script_map_summary.xml +11 -0
  3269. data/tests/wellformed/sanitize/item_description_style.xml +11 -0
  3270. data/tests/wellformed/sanitize/item_fullitem_applet.xml +11 -0
  3271. data/tests/wellformed/sanitize/item_fullitem_blink.xml +11 -0
  3272. data/tests/wellformed/sanitize/item_fullitem_crazy.xml +77 -0
  3273. data/tests/wellformed/sanitize/item_fullitem_embed.xml +11 -0
  3274. data/tests/wellformed/sanitize/item_fullitem_frame.xml +11 -0
  3275. data/tests/wellformed/sanitize/item_fullitem_iframe.xml +11 -0
  3276. data/tests/wellformed/sanitize/item_fullitem_link.xml +11 -0
  3277. data/tests/wellformed/sanitize/item_fullitem_meta.xml +11 -0
  3278. data/tests/wellformed/sanitize/item_fullitem_object.xml +11 -0
  3279. data/tests/wellformed/sanitize/item_fullitem_onabort.xml +11 -0
  3280. data/tests/wellformed/sanitize/item_fullitem_onblur.xml +11 -0
  3281. data/tests/wellformed/sanitize/item_fullitem_onchange.xml +11 -0
  3282. data/tests/wellformed/sanitize/item_fullitem_onclick.xml +11 -0
  3283. data/tests/wellformed/sanitize/item_fullitem_ondblclick.xml +11 -0
  3284. data/tests/wellformed/sanitize/item_fullitem_onerror.xml +11 -0
  3285. data/tests/wellformed/sanitize/item_fullitem_onfocus.xml +11 -0
  3286. data/tests/wellformed/sanitize/item_fullitem_onkeydown.xml +11 -0
  3287. data/tests/wellformed/sanitize/item_fullitem_onkeypress.xml +11 -0
  3288. data/tests/wellformed/sanitize/item_fullitem_onkeyup.xml +11 -0
  3289. data/tests/wellformed/sanitize/item_fullitem_onload.xml +11 -0
  3290. data/tests/wellformed/sanitize/item_fullitem_onmousedown.xml +11 -0
  3291. data/tests/wellformed/sanitize/item_fullitem_onmouseout.xml +11 -0
  3292. data/tests/wellformed/sanitize/item_fullitem_onmouseover.xml +11 -0
  3293. data/tests/wellformed/sanitize/item_fullitem_onmouseup.xml +11 -0
  3294. data/tests/wellformed/sanitize/item_fullitem_onreset.xml +11 -0
  3295. data/tests/wellformed/sanitize/item_fullitem_onresize.xml +11 -0
  3296. data/tests/wellformed/sanitize/item_fullitem_onsubmit.xml +11 -0
  3297. data/tests/wellformed/sanitize/item_fullitem_onunload.xml +11 -0
  3298. data/tests/wellformed/sanitize/item_fullitem_script.xml +11 -0
  3299. data/tests/wellformed/sanitize/item_fullitem_script_cdata.xml +11 -0
  3300. data/tests/wellformed/sanitize/item_fullitem_script_map_summary.xml +11 -0
  3301. data/tests/wellformed/sanitize/item_fullitem_style.xml +11 -0
  3302. data/tests/wellformed/sanitize/item_xhtml_body_applet.xml +11 -0
  3303. data/tests/wellformed/sanitize/item_xhtml_body_blink.xml +11 -0
  3304. data/tests/wellformed/sanitize/item_xhtml_body_embed.xml +11 -0
  3305. data/tests/wellformed/sanitize/item_xhtml_body_frame.xml +11 -0
  3306. data/tests/wellformed/sanitize/item_xhtml_body_iframe.xml +11 -0
  3307. data/tests/wellformed/sanitize/item_xhtml_body_link.xml +11 -0
  3308. data/tests/wellformed/sanitize/item_xhtml_body_meta.xml +11 -0
  3309. data/tests/wellformed/sanitize/item_xhtml_body_object.xml +11 -0
  3310. data/tests/wellformed/sanitize/item_xhtml_body_onabort.xml +11 -0
  3311. data/tests/wellformed/sanitize/item_xhtml_body_onblur.xml +11 -0
  3312. data/tests/wellformed/sanitize/item_xhtml_body_onchange.xml +11 -0
  3313. data/tests/wellformed/sanitize/item_xhtml_body_onclick.xml +11 -0
  3314. data/tests/wellformed/sanitize/item_xhtml_body_ondblclick.xml +11 -0
  3315. data/tests/wellformed/sanitize/item_xhtml_body_onerror.xml +11 -0
  3316. data/tests/wellformed/sanitize/item_xhtml_body_onfocus.xml +11 -0
  3317. data/tests/wellformed/sanitize/item_xhtml_body_onkeydown.xml +11 -0
  3318. data/tests/wellformed/sanitize/item_xhtml_body_onkeypress.xml +11 -0
  3319. data/tests/wellformed/sanitize/item_xhtml_body_onkeyup.xml +11 -0
  3320. data/tests/wellformed/sanitize/item_xhtml_body_onload.xml +11 -0
  3321. data/tests/wellformed/sanitize/item_xhtml_body_onmousedown.xml +11 -0
  3322. data/tests/wellformed/sanitize/item_xhtml_body_onmouseout.xml +11 -0
  3323. data/tests/wellformed/sanitize/item_xhtml_body_onmouseover.xml +11 -0
  3324. data/tests/wellformed/sanitize/item_xhtml_body_onmouseup.xml +11 -0
  3325. data/tests/wellformed/sanitize/item_xhtml_body_onreset.xml +11 -0
  3326. data/tests/wellformed/sanitize/item_xhtml_body_onresize.xml +11 -0
  3327. data/tests/wellformed/sanitize/item_xhtml_body_onsubmit.xml +11 -0
  3328. data/tests/wellformed/sanitize/item_xhtml_body_onunload.xml +11 -0
  3329. data/tests/wellformed/sanitize/item_xhtml_body_script.xml +11 -0
  3330. data/tests/wellformed/sanitize/item_xhtml_body_script_map_content.xml +11 -0
  3331. data/tests/wellformed/sanitize/item_xhtml_body_style.xml +11 -0
  3332. metadata +3472 -0
data/LICENSE ADDED
@@ -0,0 +1,68 @@
1
+ I include this license in good faith effort, and it should be considered the license for the code herein.
2
+ - Jeff Hodges < jeff at somethingsimilar.com >
3
+ --
4
+ Universal Feed Parser (feedparser.py), its testing harness (feedparsertest.py),
5
+ and its unit tests (everything in the tests/ directory) are released under the
6
+ following license:
7
+
8
+ ----- begin license block -----
9
+
10
+ Copyright (c) 2002-2005, Mark Pilgrim
11
+ All rights reserved.
12
+
13
+ Redistribution and use in source and binary forms, with or without modification,
14
+ are permitted provided that the following conditions are met:
15
+
16
+ * Redistributions of source code must retain the above copyright notice,
17
+ this list of conditions and the following disclaimer.
18
+ * Redistributions in binary form must reproduce the above copyright notice,
19
+ this list of conditions and the following disclaimer in the documentation
20
+ and/or other materials provided with the distribution.
21
+
22
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
23
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32
+ POSSIBILITY OF SUCH DAMAGE.
33
+
34
+ ----- end license block -----
35
+
36
+
37
+
38
+
39
+
40
+ Universal Feed Parser documentation (everything in the docs/ directory) is
41
+ released under the following license:
42
+
43
+ ----- begin license block -----
44
+
45
+ Copyright 2004-2005 Mark Pilgrim. All rights reserved.
46
+
47
+ Redistribution and use in source (XML DocBook) and "compiled" forms (SGML,
48
+ HTML, PDF, PostScript, RTF and so forth) with or without modification, are
49
+ permitted provided that the following conditions are met:
50
+
51
+ * Redistributions of source code (XML DocBook) must retain the above copyright
52
+ notice, this list of conditions and the following disclaimer.
53
+ * Redistributions in compiled form (transformed to other DTDs, converted to
54
+ PDF, PostScript, RTF and other formats) must reproduce the above copyright
55
+ notice, this list of conditions and the following disclaimer in the
56
+ documentation and/or other materials provided with the distribution.
57
+
58
+ THIS DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
59
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
62
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
63
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
64
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
65
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
66
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
67
+ ARISING IN ANY WAY OUT OF THE USE OF THIS DOCUMENTATION, EVEN IF ADVISED OF THE
68
+ POSSIBILITY OF SUCH DAMAGE.
data/README ADDED
@@ -0,0 +1,28 @@
1
+ Universal Feed Parser
2
+ Parse RSS and Atom feeds in Python. 3000 unit tests. Open source.
3
+
4
+ Copyright (c) 2002-5 by Mark Pilgrim
5
+ open source, see LICENSE file for details
6
+
7
+ -----
8
+
9
+ To use:
10
+ If installed as a gem
11
+ require 'rubygems'
12
+ gem 'rfeedparser
13
+ require 'feedparser'
14
+
15
+ fp = FeedParser.parse("some-feed-filepath-or-url")
16
+
17
+ If not installed as a gem, copy the contents of lib into the ruby path and just use
18
+ require 'feedparser'
19
+
20
+ fp = FeedParser.parse("some-feed-filepath-or-url")
21
+
22
+ ----
23
+
24
+ For developers:
25
+ I currently have the "rough" code in a bzr branch over at
26
+ <http://somethingsimilar.com/code/bzr/rfeedparser/>. You'll want
27
+ to check out the rfeedparser-main branch for the current code, or
28
+ rfeedparser-release for the code in the latest release.
@@ -0,0 +1,60 @@
1
+ === Testing rFeedParser ===
2
+ Simply run `ruby feedparsertest.rb` to run all of the FeedParser tests.
3
+ Optionally, you can start up feedparserserver.rb and run feedparser.rb
4
+ against "http://localhost:8097/tests/path/to/testcase.xml" if you want
5
+ to try a test individually. I'll probably merge feedparserserver.rb into
6
+ feedparsertext.rb soon.
7
+
8
+ === Last Count 20070321 ===
9
+ By my last count, feedparsertext.rb says that there are 45 assertions
10
+ that fail, and 4 that error out. I've included here a few tests that
11
+ "Failed, Sort Of". By that I mean, the behaviors the tests are meant to
12
+ check are correct, but the test fails because of some other superficial
13
+ or unrelated behavior.
14
+
15
+ === Tests Failed, Sort Of ===
16
+
17
+ Problem:
18
+ Hpricot adds end tags when it sees an unclosed tag. This means that
19
+ certain tests that rely on feedparser.py's _HTMLSanitizer not closing
20
+ tags will fail. Many of the tests affected (actually, all the ones
21
+ affected, AFAICT) would otherwise passed.
22
+
23
+ Tests Affected:
24
+ * tests/wellformed/rss/item_description_not_a_doctype.xml (extraneous trailing </a>)
25
+ * tests/illformed/rss/item_description_not_a_doctype.xml (ditto)
26
+ ==
27
+ Problem:
28
+ The Hpricot#scrub method I've written does not remove the dangerous
29
+ markup in the same way feedparser.py does, but the output is still safe.
30
+
31
+ Tests Affected:
32
+ * tests/wellformed/sanitize/entry_content_crazy.xml
33
+ * tests/wellformed/sanitize/entry_summary_crazy.xml
34
+ * tests/wellformed/sanitize/entry_title_crazy.xml
35
+ * tests/wellformed/sanitize/feed_copyright_crazy.xml
36
+ * tests/wellformed/sanitize/feed_info_crazy.xml
37
+ * tests/wellformed/sanitize/feed_subtitle_crazy.xml
38
+ * tests/wellformed/sanitize/feed_tagline_crazy.xml
39
+ * tests/wellformed/sanitize/feed_title_crazy.xml
40
+ * tests/wellformed/sanitize/item_content_encoded_crazy.xml
41
+ * tests/wellformed/sanitize/item_description_crazy.xml
42
+ * tests/wellformed/sanitize/item_fullitem_crazy.xml
43
+ * tests/illformed/sanitize/entry_content_crazy.xml
44
+ * tests/illformed/sanitize/entry_summary_crazy.xml
45
+ * tests/illformed/sanitize/entry_title_crazy.xml
46
+ * tests/illformed/sanitize/feed_copyright_crazy.xml
47
+ * tests/illformed/sanitize/feed_info_crazy.xml
48
+ * tests/illformed/sanitize/feed_subtitle_crazy.xml
49
+ * tests/illformed/sanitize/feed_tagline_crazy.xml
50
+ * tests/illformed/sanitize/feed_title_crazy.xml
51
+ * tests/illformed/sanitize/item_content_encoded_crazy.xml
52
+ * tests/illformed/sanitize/item_description_crazy.xml
53
+ * tests/illformed/sanitize/item_fullitem_crazy.xml
54
+ ==
55
+
56
+ Problem:
57
+ My current system lacks a few encodings that rfeedparser and Iconv need.
58
+ This results in failures that will probably not occur on other machines.
59
+
60
+ Tests Affected:
@@ -0,0 +1,3671 @@
1
+ #!/usr/bin/env ruby
2
+ """Universal feed parser in Ruby
3
+
4
+ Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds
5
+
6
+ Visit http://feedparser.org/ for the latest version in Python
7
+ Visit http://feedparser.org/docs/ for the latest documentation
8
+ Email Jeff Hodges at jeff@obquo.com for questions
9
+
10
+ Required: Ruby 1.8
11
+ """
12
+ $KCODE = 'UTF8'
13
+ require 'stringio'
14
+ require 'uri'
15
+ require 'cgi' # escaping html
16
+ require 'time'
17
+ require 'xml/saxdriver' # calling expat
18
+ require 'pp'
19
+ require 'rubygems'
20
+ require 'base64'
21
+ require 'iconv'
22
+ begin
23
+ gem 'hpricot', ">=0.5"
24
+ gem 'character-encodings', ">=0.2.0"
25
+ gem 'htmltools'
26
+ gem 'htmlentities'
27
+ gem 'activesupport'
28
+ gem 'rchardet'
29
+ rescue Gem::LoadError,LoadError
30
+ end
31
+
32
+ require 'chardet'
33
+ $chardet = true
34
+
35
+ require 'hpricot'
36
+ require 'encoding/character/utf-8'
37
+ require 'html/sgml-parser'
38
+ require 'htmlentities'
39
+ require 'active_support'
40
+ require 'open-uri'
41
+ include OpenURI
42
+
43
+ $debug = false
44
+ $compatible = true
45
+
46
+ Encoding_Aliases = { # Adapted from python2.4's encodings/aliases.py
47
+ # ascii codec
48
+ '646' => 'ascii',
49
+ 'ansi_x3.4_1968' => 'ascii',
50
+ 'ansi_x3_4_1968' => 'ascii', # some email headers use this non-standard name
51
+ 'ansi_x3.4_1986' => 'ascii',
52
+ 'cp367' => 'ascii',
53
+ 'csascii' => 'ascii',
54
+ 'ibm367' => 'ascii',
55
+ 'iso646_us' => 'ascii',
56
+ 'iso_646.irv_1991' => 'ascii',
57
+ 'iso_ir_6' => 'ascii',
58
+ 'us' => 'ascii',
59
+ 'us_ascii' => 'ascii',
60
+
61
+ # big5 codec
62
+ 'big5_tw' => 'big5',
63
+ 'csbig5' => 'big5',
64
+
65
+ # big5hkscs codec
66
+ 'big5_hkscs' => 'big5hkscs',
67
+ 'hkscs' => 'big5hkscs',
68
+
69
+ # cp037 codec
70
+ '037' => 'cp037',
71
+ 'csibm037' => 'cp037',
72
+ 'ebcdic_cp_ca' => 'cp037',
73
+ 'ebcdic_cp_nl' => 'cp037',
74
+ 'ebcdic_cp_us' => 'cp037',
75
+ 'ebcdic_cp_wt' => 'cp037',
76
+ 'ibm037' => 'cp037',
77
+ 'ibm039' => 'cp037',
78
+
79
+ # cp1026 codec
80
+ '1026' => 'cp1026',
81
+ 'csibm1026' => 'cp1026',
82
+ 'ibm1026' => 'cp1026',
83
+
84
+ # cp1140 codec
85
+ '1140' => 'cp1140',
86
+ 'ibm1140' => 'cp1140',
87
+
88
+ # cp1250 codec
89
+ '1250' => 'cp1250',
90
+ 'windows_1250' => 'cp1250',
91
+
92
+ # cp1251 codec
93
+ '1251' => 'cp1251',
94
+ 'windows_1251' => 'cp1251',
95
+
96
+ # cp1252 codec
97
+ '1252' => 'cp1252',
98
+ 'windows_1252' => 'cp1252',
99
+
100
+ # cp1253 codec
101
+ '1253' => 'cp1253',
102
+ 'windows_1253' => 'cp1253',
103
+
104
+ # cp1254 codec
105
+ '1254' => 'cp1254',
106
+ 'windows_1254' => 'cp1254',
107
+
108
+ # cp1255 codec
109
+ '1255' => 'cp1255',
110
+ 'windows_1255' => 'cp1255',
111
+
112
+ # cp1256 codec
113
+ '1256' => 'cp1256',
114
+ 'windows_1256' => 'cp1256',
115
+
116
+ # cp1257 codec
117
+ '1257' => 'cp1257',
118
+ 'windows_1257' => 'cp1257',
119
+
120
+ # cp1258 codec
121
+ '1258' => 'cp1258',
122
+ 'windows_1258' => 'cp1258',
123
+
124
+ # cp424 codec
125
+ '424' => 'cp424',
126
+ 'csibm424' => 'cp424',
127
+ 'ebcdic_cp_he' => 'cp424',
128
+ 'ibm424' => 'cp424',
129
+
130
+ # cp437 codec
131
+ '437' => 'cp437',
132
+ 'cspc8codepage437' => 'cp437',
133
+ 'ibm437' => 'cp437',
134
+
135
+ # cp500 codec
136
+ '500' => 'cp500',
137
+ 'csibm500' => 'cp500',
138
+ 'ebcdic_cp_be' => 'cp500',
139
+ 'ebcdic_cp_ch' => 'cp500',
140
+ 'ibm500' => 'cp500',
141
+
142
+ # cp775 codec
143
+ '775' => 'cp775',
144
+ 'cspc775baltic' => 'cp775',
145
+ 'ibm775' => 'cp775',
146
+
147
+ # cp850 codec
148
+ '850' => 'cp850',
149
+ 'cspc850multilingual' => 'cp850',
150
+ 'ibm850' => 'cp850',
151
+
152
+ # cp852 codec
153
+ '852' => 'cp852',
154
+ 'cspcp852' => 'cp852',
155
+ 'ibm852' => 'cp852',
156
+
157
+ # cp855 codec
158
+ '855' => 'cp855',
159
+ 'csibm855' => 'cp855',
160
+ 'ibm855' => 'cp855',
161
+
162
+ # cp857 codec
163
+ '857' => 'cp857',
164
+ 'csibm857' => 'cp857',
165
+ 'ibm857' => 'cp857',
166
+
167
+ # cp860 codec
168
+ '860' => 'cp860',
169
+ 'csibm860' => 'cp860',
170
+ 'ibm860' => 'cp860',
171
+
172
+ # cp861 codec
173
+ '861' => 'cp861',
174
+ 'cp_is' => 'cp861',
175
+ 'csibm861' => 'cp861',
176
+ 'ibm861' => 'cp861',
177
+
178
+ # cp862 codec
179
+ '862' => 'cp862',
180
+ 'cspc862latinhebrew' => 'cp862',
181
+ 'ibm862' => 'cp862',
182
+
183
+ # cp863 codec
184
+ '863' => 'cp863',
185
+ 'csibm863' => 'cp863',
186
+ 'ibm863' => 'cp863',
187
+
188
+ # cp864 codec
189
+ '864' => 'cp864',
190
+ 'csibm864' => 'cp864',
191
+ 'ibm864' => 'cp864',
192
+
193
+ # cp865 codec
194
+ '865' => 'cp865',
195
+ 'csibm865' => 'cp865',
196
+ 'ibm865' => 'cp865',
197
+
198
+ # cp866 codec
199
+ '866' => 'cp866',
200
+ 'csibm866' => 'cp866',
201
+ 'ibm866' => 'cp866',
202
+
203
+ # cp869 codec
204
+ '869' => 'cp869',
205
+ 'cp_gr' => 'cp869',
206
+ 'csibm869' => 'cp869',
207
+ 'ibm869' => 'cp869',
208
+
209
+ # cp932 codec
210
+ '932' => 'cp932',
211
+ 'ms932' => 'cp932',
212
+ 'mskanji' => 'cp932',
213
+ 'ms_kanji' => 'cp932',
214
+
215
+ # cp949 codec
216
+ '949' => 'cp949',
217
+ 'ms949' => 'cp949',
218
+ 'uhc' => 'cp949',
219
+
220
+ # cp950 codec
221
+ '950' => 'cp950',
222
+ 'ms950' => 'cp950',
223
+
224
+ # euc_jp codec
225
+ 'euc_jp' => 'euc-jp',
226
+ 'eucjp' => 'euc-jp',
227
+ 'ujis' => 'euc-jp',
228
+ 'u_jis' => 'euc-jp',
229
+
230
+ # euc_kr codec
231
+ 'euc_kr' => 'euc-kr',
232
+ 'euckr' => 'euc-kr',
233
+ 'korean' => 'euc-kr',
234
+ 'ksc5601' => 'euc-kr',
235
+ 'ks_c_5601' => 'euc-kr',
236
+ 'ks_c_5601_1987' => 'euc-kr',
237
+ 'ksx1001' => 'euc-kr',
238
+ 'ks_x_1001' => 'euc-kr',
239
+
240
+ # gb18030 codec
241
+ 'gb18030_2000' => 'gb18030',
242
+
243
+ # gb2312 codec
244
+ 'chinese' => 'gb2312',
245
+ 'csiso58gb231280' => 'gb2312',
246
+ 'euc_cn' => 'gb2312',
247
+ 'euccn' => 'gb2312',
248
+ 'eucgb2312_cn' => 'gb2312',
249
+ 'gb2312_1980' => 'gb2312',
250
+ 'gb2312_80' => 'gb2312',
251
+ 'iso_ir_58' => 'gb2312',
252
+
253
+ # gbk codec
254
+ '936' => 'gbk',
255
+ 'cp936' => 'gbk',
256
+ 'ms936' => 'gbk',
257
+
258
+ # hp-roman8 codec
259
+ 'hp_roman8' => 'hp-roman8',
260
+ 'roman8' => 'hp-roman8',
261
+ 'r8' => 'hp-roman8',
262
+ 'csHPRoman8' => 'hp-roman8',
263
+
264
+ # iso2022_jp codec
265
+ 'iso2022_jp' => 'iso-2022-jp',
266
+ 'csiso2022jp' => 'iso-2022-jp',
267
+ 'iso2022jp' => 'iso-2022-jp',
268
+ 'iso_2022_jp' => 'iso-2022-jp',
269
+
270
+ # iso2022_jp_1 codec
271
+ 'iso2002_jp_1' => 'iso-2022-jp-1',
272
+ 'iso2022jp_1' => 'iso-2022-jp-1',
273
+ 'iso_2022_jp_1' => 'iso-2022-jp-1',
274
+
275
+ # iso2022_jp_2 codec
276
+ 'iso2022_jp_2' => 'iso-2002-jp-2',
277
+ 'iso2022jp_2' => 'iso-2022-jp-2',
278
+ 'iso_2022_jp_2' => 'iso-2022-jp-2',
279
+
280
+ # iso2022_jp_3 codec
281
+ 'iso2002_jp_3' => 'iso-2022-jp-3',
282
+ 'iso2022jp_3' => 'iso-2022-jp-3',
283
+ 'iso_2022_jp_3' => 'iso-2022-jp-3',
284
+
285
+ # iso2022_kr codec
286
+ 'iso2022_kr' => 'iso-2022-kr',
287
+ 'csiso2022kr' => 'iso-2022-kr',
288
+ 'iso2022kr' => 'iso-2022-kr',
289
+ 'iso_2022_kr' => 'iso-2022-kr',
290
+
291
+ # iso8859_10 codec
292
+ 'iso8859_10' => 'iso-8859-10',
293
+ 'csisolatin6' => 'iso-8859-10',
294
+ 'iso_8859_10' => 'iso-8859-10',
295
+ 'iso_8859_10_1992' => 'iso-8859-10',
296
+ 'iso_ir_157' => 'iso-8859-10',
297
+ 'l6' => 'iso-8859-10',
298
+ 'latin6' => 'iso-8859-10',
299
+
300
+ # iso8859_13 codec
301
+ 'iso8859_13' => 'iso-8859-13',
302
+ 'iso_8859_13' => 'iso-8859-13',
303
+
304
+ # iso8859_14 codec
305
+ 'iso8859_14' => 'iso-8859-14',
306
+ 'iso_8859_14' => 'iso-8859-14',
307
+ 'iso_8859_14_1998' => 'iso-8859-14',
308
+ 'iso_celtic' => 'iso-8859-14',
309
+ 'iso_ir_199' => 'iso-8859-14',
310
+ 'l8' => 'iso-8859-14',
311
+ 'latin8' => 'iso-8859-14',
312
+
313
+ # iso8859_15 codec
314
+ 'iso8859_15' => 'iso-8859-15',
315
+ 'iso_8859_15' => 'iso-8859-15',
316
+
317
+ # iso8859_1 codec
318
+ 'latin_1' => 'iso-8859-1',
319
+ 'cp819' => 'iso-8859-1',
320
+ 'csisolatin1' => 'iso-8859-1',
321
+ 'ibm819' => 'iso-8859-1',
322
+ 'iso8859' => 'iso-8859-1',
323
+ 'iso_8859_1' => 'iso-8859-1',
324
+ 'iso_8859_1_1987' => 'iso-8859-1',
325
+ 'iso_ir_100' => 'iso-8859-1',
326
+ 'l1' => 'iso-8859-1',
327
+ 'latin' => 'iso-8859-1',
328
+ 'latin1' => 'iso-8859-1',
329
+
330
+ # iso8859_2 codec
331
+ 'iso8859_2' => 'iso-8859-2',
332
+ 'csisolatin2' => 'iso-8859-2',
333
+ 'iso_8859_2' => 'iso-8859-2',
334
+ 'iso_8859_2_1987' => 'iso-8859-2',
335
+ 'iso_ir_101' => 'iso-8859-2',
336
+ 'l2' => 'iso-8859-2',
337
+ 'latin2' => 'iso-8859-2',
338
+
339
+ # iso8859_3 codec
340
+ 'iso8859_3' => 'iso-8859-3',
341
+ 'csisolatin3' => 'iso-8859-3',
342
+ 'iso_8859_3' => 'iso-8859-3',
343
+ 'iso_8859_3_1988' => 'iso-8859-3',
344
+ 'iso_ir_109' => 'iso-8859-3',
345
+ 'l3' => 'iso-8859-3',
346
+ 'latin3' => 'iso-8859-3',
347
+
348
+ # iso8859_4 codec
349
+ 'iso8849_4' => 'iso-8859-4',
350
+ 'csisolatin4' => 'iso-8859-4',
351
+ 'iso_8859_4' => 'iso-8859-4',
352
+ 'iso_8859_4_1988' => 'iso-8859-4',
353
+ 'iso_ir_110' => 'iso-8859-4',
354
+ 'l4' => 'iso-8859-4',
355
+ 'latin4' => 'iso-8859-4',
356
+
357
+ # iso8859_5 codec
358
+ 'iso8859_5' => 'iso-8859-5',
359
+ 'csisolatincyrillic' => 'iso-8859-5',
360
+ 'cyrillic' => 'iso-8859-5',
361
+ 'iso_8859_5' => 'iso-8859-5',
362
+ 'iso_8859_5_1988' => 'iso-8859-5',
363
+ 'iso_ir_144' => 'iso-8859-5',
364
+
365
+ # iso8859_6 codec
366
+ 'iso8859_6' => 'iso-8859-6',
367
+ 'arabic' => 'iso-8859-6',
368
+ 'asmo_708' => 'iso-8859-6',
369
+ 'csisolatinarabic' => 'iso-8859-6',
370
+ 'ecma_114' => 'iso-8859-6',
371
+ 'iso_8859_6' => 'iso-8859-6',
372
+ 'iso_8859_6_1987' => 'iso-8859-6',
373
+ 'iso_ir_127' => 'iso-8859-6',
374
+
375
+ # iso8859_7 codec
376
+ 'iso8859_7' => 'iso-8859-7',
377
+ 'csisolatingreek' => 'iso-8859-7',
378
+ 'ecma_118' => 'iso-8859-7',
379
+ 'elot_928' => 'iso-8859-7',
380
+ 'greek' => 'iso-8859-7',
381
+ 'greek8' => 'iso-8859-7',
382
+ 'iso_8859_7' => 'iso-8859-7',
383
+ 'iso_8859_7_1987' => 'iso-8859-7',
384
+ 'iso_ir_126' => 'iso-8859-7',
385
+
386
+ # iso8859_8 codec
387
+ 'iso8859_9' => 'iso8859_8',
388
+ 'csisolatinhebrew' => 'iso-8859-8',
389
+ 'hebrew' => 'iso-8859-8',
390
+ 'iso_8859_8' => 'iso-8859-8',
391
+ 'iso_8859_8_1988' => 'iso-8859-8',
392
+ 'iso_ir_138' => 'iso-8859-8',
393
+
394
+ # iso8859_9 codec
395
+ 'iso8859_9' => 'iso-8859-9',
396
+ 'csisolatin5' => 'iso-8859-9',
397
+ 'iso_8859_9' => 'iso-8859-9',
398
+ 'iso_8859_9_1989' => 'iso-8859-9',
399
+ 'iso_ir_148' => 'iso-8859-9',
400
+ 'l5' => 'iso-8859-9',
401
+ 'latin5' => 'iso-8859-9',
402
+
403
+ # iso8859_11 codec
404
+ 'iso8859_11' => 'iso-8859-11',
405
+ 'thai' => 'iso-8859-11',
406
+ 'iso_8859_11' => 'iso-8859-11',
407
+ 'iso_8859_11_2001' => 'iso-8859-11',
408
+
409
+ # iso8859_16 codec
410
+ 'iso8859_16' => 'iso-8859-16',
411
+ 'iso_8859_16' => 'iso-8859-16',
412
+ 'iso_8859_16_2001' => 'iso-8859-16',
413
+ 'iso_ir_226' => 'iso-8859-16',
414
+ 'l10' => 'iso-8859-16',
415
+ 'latin10' => 'iso-8859-16',
416
+
417
+ # cskoi8r codec
418
+ 'koi8_r' => 'cskoi8r',
419
+
420
+ # mac_cyrillic codec
421
+ 'mac_cyrillic' => 'maccyrillic',
422
+
423
+ # shift_jis codec
424
+ 'csshiftjis' => 'shift_jis',
425
+ 'shiftjis' => 'shift_jis',
426
+ 'sjis' => 'shift_jis',
427
+ 's_jis' => 'shift_jis',
428
+
429
+ # shift_jisx0213 codec
430
+ 'shiftjisx0213' => 'shift_jisx0213',
431
+ 'sjisx0213' => 'shift_jisx0213',
432
+ 's_jisx0213' => 'shift_jisx0213',
433
+
434
+ # utf_16 codec
435
+ 'utf_16' => 'utf-16',
436
+ 'u16' => 'utf-16',
437
+ 'utf16' => 'utf-16',
438
+
439
+ # utf_16_be codec
440
+ 'utf_16_be' => 'utf-16be',
441
+ 'unicodebigunmarked' => 'utf-16be',
442
+ 'utf_16be' => 'utf-16be',
443
+
444
+ # utf_16_le codec
445
+ 'utf_16_le' => 'utf-16le',
446
+ 'unicodelittleunmarked' => 'utf-16le',
447
+ 'utf_16le' => 'utf-16le',
448
+
449
+ # utf_7 codec
450
+ 'utf_7' => 'utf-7',
451
+ 'u7' => 'utf-7',
452
+ 'utf7' => 'utf-7',
453
+
454
+ # utf_8 codec
455
+ 'utf_8' => 'utf-8',
456
+ 'u8' => 'utf-8',
457
+ 'utf' => 'utf-8',
458
+ 'utf8' => 'utf-8',
459
+ 'utf8_ucs2' => 'utf-8',
460
+ 'utf8_ucs4' => 'utf-8',
461
+ }
462
+
463
+ def unicode(data, from_encoding)
464
+ # Takes a single string and converts it from the encoding in
465
+ # from_encoding to unicode.
466
+ uconvert(data, from_encoding, 'unicode')
467
+ end
468
+
469
+ def uconvert(data, from_encoding, to_encoding = 'utf-8')
470
+ from_encoding = Encoding_Aliases[from_encoding] || from_encoding
471
+ to_encoding = Encoding_Aliases[to_encoding] || to_encoding
472
+ Iconv.iconv(to_encoding, from_encoding, data)[0]
473
+ end
474
+
475
+ def unichr(i)
476
+ [i].pack('U*')
477
+ end
478
+
479
+ def index_match(stri,regexp, offset)
480
+ if offset == 241
481
+ end
482
+ i = stri.index(regexp, offset)
483
+
484
+ return nil, nil unless i
485
+
486
+ full = stri[i..-1].match(regexp)
487
+ return i, full
488
+ end
489
+
490
+ def _ebcdic_to_ascii(s)
491
+ return Iconv.iconv("iso88591", "ebcdic-cp-be", s)[0]
492
+ end
493
+
494
+ def urljoin(base, uri)
495
+ urifixer = /^([A-Za-z][A-Za-z0-9+-.]*:\/\/)(\/*)(.*?)/u
496
+ uri = uri.sub(urifixer, '\1\3')
497
+ begin
498
+ return URI.join(base, uri).to_s
499
+ rescue URI::BadURIError => e
500
+ if URI.parse(base).relative?
501
+ return URI::parse(uri).to_s
502
+ end
503
+ end
504
+ end
505
+
506
+ def py2rtime(pytuple)
507
+ Time.utc(pytuple[0..5])
508
+ end
509
+
510
+ # http://intertwingly.net/stories/2005/09/28/xchar.rb
511
+ module XChar
512
+ # http://intertwingly.net/stories/2004/04/14/i18n.html#CleaningWindows
513
+ CP1252 = {
514
+ 128 => 8364, # euro sign
515
+ 130 => 8218, # single low-9 quotation mark
516
+ 131 => 402, # latin small letter f with hook
517
+ 132 => 8222, # double low-9 quotation mark
518
+ 133 => 8230, # horizontal ellipsis
519
+ 134 => 8224, # dagger
520
+ 135 => 8225, # double dagger
521
+ 136 => 710, # modifier letter circumflex accent
522
+ 137 => 8240, # per mille sign
523
+ 138 => 352, # latin capital letter s with caron
524
+ 139 => 8249, # single left-pointing angle quotation mark
525
+ 140 => 338, # latin capital ligature oe
526
+ 142 => 381, # latin capital letter z with caron
527
+ 145 => 8216, # left single quotation mark
528
+ 146 => 8217, # right single quotation mark
529
+ 147 => 8220, # left double quotation mark
530
+ 148 => 8221, # right double quotation mark
531
+ 149 => 8226, # bullet
532
+ 150 => 8211, # en dash
533
+ 151 => 8212, # em dash
534
+ 152 => 732, # small tilde
535
+ 153 => 8482, # trade mark sign
536
+ 154 => 353, # latin small letter s with caron
537
+ 155 => 8250, # single right-pointing angle quotation mark
538
+ 156 => 339, # latin small ligature oe
539
+ 158 => 382, # latin small letter z with caron
540
+ 159 => 376} # latin capital letter y with diaeresis
541
+
542
+ # http://www.w3.org/TR/REC-xml/#dt-chardata
543
+ PREDEFINED = {
544
+ 38 => '&amp;', # ampersand
545
+ 60 => '&lt;', # left angle bracket
546
+ 62 => '&gt;'} # right angle bracket
547
+
548
+ # http://www.w3.org/TR/REC-xml/#charsets
549
+ VALID = [[0x9, 0xA, 0xD], (0x20..0xD7FF),
550
+ (0xE000..0xFFFD), (0x10000..0x10FFFF)]
551
+ end
552
+
553
+ class Fixnum
554
+ # xml escaped version of chr
555
+ def xchr
556
+ n = XChar::CP1252[self] || self
557
+ n = 42 unless XChar::VALID.find {|range| range.include? n}
558
+ XChar::PREDEFINED[n] or (n<128 ? n.chr : "&##{n};")
559
+ end
560
+ end
561
+
562
+ class String
563
+ alias :old_index :index
564
+ def to_xs
565
+ unpack('U*').map {|n| n.xchr}.join # ASCII, UTF-8
566
+ rescue
567
+ unpack('C*').map {|n| n.xchr}.join # ISO-8859-1, WIN-1252
568
+ end
569
+ end
570
+
571
+ class BetterSGMLParserError < Exception; end;
572
+ class BetterSGMLParser < HTML::SGMLParser
573
+ # Replaced Tagfind and Charref Regexps with the ones in feedparser.py
574
+ # This makes things work.
575
+ Interesting = /[&<]/u
576
+ Incomplete = Regexp.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' +
577
+ '<([a-zA-Z][^<>]*|/([a-zA-Z][^<>]*)?|' +
578
+ '![^<>]*)?', 64) # 64 is the unicode flag
579
+
580
+ Entityref = /&([a-zA-Z][-.a-zA-Z0-9]*)[^-.a-zA-Z0-9]/u
581
+ Charref = /&#(x?[0-9A-Fa-f]+)[^0-9A-Fa-f]/u
582
+
583
+ Shorttagopen = /'<[a-zA-Z][-.a-zA-Z0-9]*/u
584
+ Shorttag = /'<([a-zA-Z][-.a-zA-Z0-9]*)\/([^\/]*)\//u
585
+ Endtagopen = /<\//u # Matching the Python SGMLParser
586
+ Endbracket = /[<>]/u
587
+ Declopen = /<!/u
588
+ Piopenbegin = /^<\?/u
589
+ Piclose = />/u
590
+
591
+ Commentopen = /<!--/u
592
+ Commentclose = /--\s*>/u
593
+ Tagfind = /[a-zA-Z][-_.:a-zA-Z0-9]*/u
594
+ Attrfind = Regexp.compile('\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'+
595
+ '(\'[^\']*\'|"[^"]*"|[\]\[\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?',
596
+ 64)
597
+ Endtagfind = /\s*\/\s*>/u
598
+ def initialize(verbose=false)
599
+ super(verbose)
600
+ end
601
+ def feed(*args)
602
+ super(*args)
603
+ end
604
+
605
+ def goahead(_end)
606
+ rawdata = @rawdata # woo, utf-8 magic
607
+ i = 0
608
+ n = rawdata.length
609
+ while i < n
610
+ if @nomoretags
611
+ # handle_data_range does nothing more than set a "Range" that is never used. wtf?
612
+ handle_data(rawdata[i...n]) # i...n means "range from i to n not including n"
613
+ i = n
614
+ break
615
+ end
616
+ j = rawdata.index(Interesting, i)
617
+ j = n unless j
618
+ handle_data(rawdata[i...j]) if i < j
619
+ i = j
620
+ break if (i == n)
621
+ if rawdata[i..i] == '<' # equivalent to rawdata[i..i] == '<' # Yeah, ugly.
622
+ if rawdata.index(Starttagopen,i) == i
623
+ if @literal
624
+ handle_data(rawdata[i..i])
625
+ i = i+1
626
+ next
627
+ end
628
+ k = parse_starttag(i)
629
+ break unless k
630
+ i = k
631
+ next
632
+ end
633
+ if rawdata.index(Endtagopen,i) == i #Don't use Endtagopen
634
+ k = parse_endtag(i)
635
+ break unless k
636
+ i = k
637
+ @literal = false
638
+ next
639
+ end
640
+ if @literal
641
+ if n > (i+1)
642
+ handle_data("<")
643
+ i = i+1
644
+ else
645
+ #incomplete
646
+ break
647
+ end
648
+ next
649
+ end
650
+ if rawdata.index(Commentopen,i) == i
651
+ k = parse_comment(i)
652
+ break unless k
653
+ i = k
654
+ next
655
+ end
656
+ if rawdata.index(Piopenbegin,i) == i # Like Piopen but must be at beginning of rawdata
657
+ k = parse_pi(i)
658
+ break unless k
659
+ i += k
660
+ next
661
+ end
662
+ if rawdata.index(Declopen,i) == i
663
+ # This is some sort of declaration; in "HTML as
664
+ # deployed," this should only be the document type
665
+ # declaration ("<!DOCTYPE html...>").
666
+ k = parse_declaration(i)
667
+ break unless k
668
+ i = k
669
+ next
670
+ end
671
+ elsif rawdata[i..i] == '&'
672
+ if @literal # FIXME BUGME SGMLParser totally does not check this. Bug it.
673
+ handle_data(rawdata[i..i])
674
+ i += 1
675
+ next
676
+ end
677
+
678
+ # the Char must come first as its #=~ method is the only one that is UTF-8 safe
679
+ ni,match = index_match(rawdata, Charref, i)
680
+ if ni and ni == i # See? Ugly
681
+ handle_charref(match[1]) # $1 is just the first group we captured (with parentheses)
682
+ i += match[0].length # $& is the "all" of the match.. it includes the full match we looked for not just the stuff we put parentheses around to capture.
683
+ i -= 1 unless rawdata[i-1..i-1] == ";"
684
+ next
685
+ end
686
+ ni,match = index_match(rawdata, Entityref, i)
687
+ if ni and ni == i
688
+ handle_entityref(match[1])
689
+ i += match[0].length
690
+ i -= 1 unless rawdata[i-1..i-1] == ";"
691
+ next
692
+ end
693
+ else
694
+ error('neither < nor & ??')
695
+ end
696
+ # We get here only if incomplete matches but
697
+ # nothing else
698
+ ni,match = index_match(rawdata,Incomplete,i)
699
+ unless ni and ni == 0
700
+ handle_data(rawdata[i...i+1]) # str[i...i+1] == str[i..i]
701
+ i += 1
702
+ next
703
+ end
704
+ j = ni + match[0].length
705
+ break if j == n # Really incomplete
706
+ handle_data(rawdata[i...j])
707
+ i = j
708
+ end # end while
709
+
710
+ if _end and i < n
711
+ handle_data(rawdata[i...n])
712
+ i = n
713
+ end
714
+
715
+ @rawdata = rawdata[i..-1]
716
+ # @offset += i # FIXME BUGME another unused variable in SGMLParser?
717
+ end
718
+
719
+
720
+ # Internal -- parse processing instr, return length or -1 if not terminated
721
+ def parse_pi(i)
722
+ rawdata = @rawdata
723
+ if rawdata[i...i+2] != '<?'
724
+ error("unexpected call to parse_pi()")
725
+ end
726
+ ni,match = index_match(rawdata,Piclose,i+2)
727
+ return nil unless match
728
+ j = ni
729
+ handle_pi(rawdata[i+2...j])
730
+ j = (j + match[0].length)
731
+ return j-i
732
+ end
733
+
734
+ def parse_comment(i)
735
+ rawdata = @rawdata
736
+ if rawdata[i...i+4] != "<!--"
737
+ error("unexpected call to parse_comment()")
738
+ end
739
+ ni,match = index_match(rawdata, Commentclose,i)
740
+ return nil unless match
741
+ handle_comment(rawdata[i+4..(ni-1)])
742
+ return ni+match[0].length # Length from i to just past the closing comment tag
743
+ end
744
+
745
+
746
+ def parse_starttag(i)
747
+ @_starttag_text = nil
748
+ start_pos = i
749
+ rawdata = @rawdata
750
+ ni,match = index_match(rawdata,Shorttagopen,i)
751
+ if ni == i
752
+ # SGML shorthand: <tag/data/ == <tag>data</tag>
753
+ # XXX Can data contain &... (entity or char refs)?
754
+ # XXX Can data contain < or > (tag characters)?
755
+ # XXX Can there be whitespace before the first /?
756
+ k,match = index_match(rawdata,Shorttag,i)
757
+ return nil unless match
758
+ tag, data = match[1], match[2]
759
+ @_starttag_text = "<#{tag}/"
760
+ tag.downcase!
761
+ second_end = rawdata.index(Shorttagopen,k)
762
+ finish_shorttag(tag, data)
763
+ @_starttag_text = rawdata[start_pos...second_end+1]
764
+ return k
765
+ end
766
+
767
+ j = rawdata.index(Endbracket, i+1)
768
+ return nil unless j
769
+ attrsd = []
770
+ if rawdata[i...i+2] == '<>'
771
+ # SGML shorthand: <> == <last open tag seen>
772
+ k = j
773
+ tag = @lasttag
774
+ else
775
+ ni,match = index_match(rawdata,Tagfind,i+1)
776
+ unless match
777
+ error('unexpected call to parse_starttag')
778
+ end
779
+ k = ni+match[0].length+1
780
+ tag = match[0].downcase
781
+ @lasttag = tag
782
+ end
783
+
784
+ while k < j
785
+ break if rawdata.index(Endtagfind, k) == k
786
+ ni,match = index_match(rawdata,Attrfind,k)
787
+ break unless ni
788
+ matched_length = match[0].length
789
+ attrname, rest, attrvalue = match[1],match[2],match[3]
790
+ if rest.nil? or rest.empty?
791
+ attrvalue = '' # was: = attrname # Why the change?
792
+ elsif [?',?'] == [attrvalue[0..0], attrvalue[-1..-1]] or [?",?"] == [attrvalue[0],attrvalue[-1]]
793
+ attrvalue = attrvalue[1...-1]
794
+ end
795
+ attrsd << [attrname.downcase, attrvalue]
796
+ k += matched_length
797
+ end
798
+ if rawdata[j..j] == ">"
799
+ j += 1
800
+ end
801
+ @_starttag_text = rawdata[start_pos...j]
802
+ finish_starttag(tag, attrsd)
803
+ return j
804
+ end
805
+
806
+ def parse_endtag(i)
807
+ rawdata = @rawdata
808
+ j, match = index_match(rawdata, /[<>]/,i+1)
809
+ return nil unless j
810
+ tag = rawdata[i+2...j].strip.downcase
811
+ if rawdata[j..j] == ">"
812
+ j += 1
813
+ end
814
+ finish_endtag(tag)
815
+ return j
816
+ end
817
+
818
+ def output
819
+ # Return processed HTML as a single string
820
+ return @pieces.map{|p| p.to_s}.join
821
+ end
822
+
823
+ def error(message)
824
+ raise BetterSGMLParserError.new(message)
825
+ end
826
+ def handle_pi(text)
827
+ end
828
+ def handle_decl(text)
829
+ end
830
+ end
831
+
832
+ # Add some helper methods to make AttributeList (all of those damn attrs
833
+ # and attrsD used by StrictFeedParser) act more like a Hash.
834
+ # NOTE AttributeList is still Read-Only (AFAICT).
835
+ # Monkey patching is terrible, and I have an addiction.
836
+ module XML
837
+ module SAX
838
+ module AttributeList # in xml/sax.rb
839
+ def [](key)
840
+ getValue(key)
841
+ end
842
+
843
+ def each(&blk)
844
+ (0...getLength).each{|pos| yield [getName(pos), getValue(pos)]}
845
+ end
846
+
847
+ def each_key(&blk)
848
+ (0...getLength).each{|pos| yield getName(pos) }
849
+ end
850
+
851
+ def each_value(&blk)
852
+ (0...getLength).each{|pos| yield getValue(pos) }
853
+ end
854
+
855
+ def to_a # Rather use collect? grep for to_a.collect
856
+ l = []
857
+ each{|k,v| l << [k,v]}
858
+ return l
859
+ end
860
+
861
+ def to_s
862
+ l = []
863
+ each{|k,v| l << "#{k} => #{v}"}
864
+ "{ "+l.join(", ")+" }"
865
+ end
866
+ end
867
+ end
868
+ end
869
+ # This adds a nice scrub method to Hpricot, so we don't need a _HTMLSanitizer class
870
+ # http://underpantsgnome.com/2007/01/20/hpricot-scrub
871
+ # I have modified it to check for attributes that are only allowed if they are in a certain tag
872
+ module Hpricot
873
+ Acceptable_Elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
874
+ 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
875
+ 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
876
+ 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
877
+ 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map',
878
+ 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp',
879
+ 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
880
+ 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
881
+ 'ul', 'var'
882
+ ]
883
+
884
+ Acceptable_Attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
885
+ 'action', 'align', 'alt', 'axis', 'border', 'cellpadding',
886
+ 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
887
+ 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
888
+ 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
889
+ 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
890
+ 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
891
+ 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
892
+ 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
893
+ 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
894
+ 'type', 'usemap', 'valign', 'value', 'vspace', 'width', 'xml:lang'
895
+ ]
896
+
897
+ Unacceptable_Elements_With_End_Tag = ['script', 'applet']
898
+
899
+ Acceptable_Css_Properties = ['azimuth', 'background-color',
900
+ 'border-bottom-color', 'border-collapse', 'border-color',
901
+ 'border-left-color', 'border-right-color', 'border-top-color', 'clear',
902
+ 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
903
+ 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
904
+ 'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
905
+ 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
906
+ 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
907
+ 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
908
+ 'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
909
+ 'white-space', 'width'
910
+ ]
911
+
912
+ # survey of common keywords found in feeds
913
+ Acceptable_Css_Keywords = ['auto', 'aqua', 'black', 'block', 'blue',
914
+ 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
915
+ 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
916
+ 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
917
+ 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
918
+ 'transparent', 'underline', 'white', 'yellow'
919
+ ]
920
+
921
+ Mathml_Elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
922
+ 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
923
+ 'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
924
+ 'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
925
+ 'munderover', 'none'
926
+ ]
927
+
928
+ Mathml_Attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
929
+ 'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
930
+ 'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
931
+ 'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
932
+ 'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
933
+ 'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
934
+ 'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
935
+ 'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
936
+ 'xlink:type', 'xmlns', 'xmlns:xlink'
937
+ ]
938
+
939
+ # svgtiny - foreignObject + linearGradient + radialGradient + stop
940
+ Svg_Elements = ['a', 'animate', 'animateColor', 'animateMotion',
941
+ 'animateTransform', 'circle', 'defs', 'desc', 'ellipse', 'font-face',
942
+ 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern', 'image',
943
+ 'linearGradient', 'line', 'metadata', 'missing-glyph', 'mpath', 'path',
944
+ 'polygon', 'polyline', 'radialGradient', 'rect', 'set', 'stop', 'svg',
945
+ 'switch', 'text', 'title', 'use'
946
+ ]
947
+
948
+ # svgtiny + class + opacity + offset + xmlns + xmlns:xlink
949
+ Svg_Attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
950
+ 'arabic-form', 'ascent', 'attributeName', 'attributeType',
951
+ 'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
952
+ 'class', 'color', 'color-rendering', 'content', 'cx', 'cy', 'd',
953
+ 'descent', 'display', 'dur', 'end', 'fill', 'fill-rule', 'font-family',
954
+ 'font-size', 'font-stretch', 'font-style', 'font-variant',
955
+ 'font-weight', 'from', 'fx', 'fy', 'g1', 'g2', 'glyph-name',
956
+ 'gradientUnits', 'hanging', 'height', 'horiz-adv-x', 'horiz-origin-x',
957
+ 'id', 'ideographic', 'k', 'keyPoints', 'keySplines', 'keyTimes',
958
+ 'lang', 'mathematical', 'max', 'min', 'name', 'offset', 'opacity',
959
+ 'origin', 'overline-position', 'overline-thickness', 'panose-1',
960
+ 'path', 'pathLength', 'points', 'preserveAspectRatio', 'r',
961
+ 'repeatCount', 'repeatDur', 'requiredExtensions', 'requiredFeatures',
962
+ 'restart', 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv',
963
+ 'stop-color', 'stop-opacity', 'strikethrough-position',
964
+ 'strikethrough-thickness', 'stroke', 'stroke-dasharray',
965
+ 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
966
+ 'stroke-miterlimit', 'stroke-width', 'systemLanguage', 'target',
967
+ 'text-anchor', 'to', 'transform', 'type', 'u1', 'u2',
968
+ 'underline-position', 'underline-thickness', 'unicode',
969
+ 'unicode-range', 'units-per-em', 'values', 'version', 'viewBox',
970
+ 'visibility', 'width', 'widths', 'x', 'x-height', 'x1', 'x2',
971
+ 'xlink:actuate', 'xlink:arcrole', 'xlink:href', 'xlink:role',
972
+ 'xlink:show', 'xlink:title', 'xlink:type', 'xml:base', 'xml:lang',
973
+ 'xml:space', 'xmlns', 'xmlns:xlink', 'y', 'y1', 'y2', 'zoomAndPan'
974
+ ]
975
+
976
+ Svg_Attr_Map = nil
977
+ Svg_Elem_Map = nil
978
+
979
+ Acceptable_Svg_Properties = [ 'fill', 'fill-opacity', 'fill-rule',
980
+ 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
981
+ 'stroke-opacity'
982
+ ]
983
+
984
+ unless $compatible
985
+ @@acceptable_tag_specific_attributes = {}
986
+ @@mathml_elements.each{|e| @@acceptable_tag_specific_attributes[e] = @@mathml_attributes }
987
+ @@svg_elements.each{|e| @@acceptable_tag_specific_attributes[e] = @@svg_attributes }
988
+ end
989
+
990
+ class Elements
991
+ def strip(allowed_tags=[]) # I completely route around this with the recursive_strip in Doc
992
+ each { |x| x.strip(allowed_tags) }
993
+ end
994
+
995
+ def strip_attributes(safe=[])
996
+ each { |x| x.strip_attributes(safe) }
997
+ end
998
+
999
+ def strip_style(ok_props=[], ok_keywords=[])
1000
+ each { |x| x.strip_style(ok_props, ok_keywords) }
1001
+ end
1002
+ end
1003
+
1004
+ class Text
1005
+ def strip(foo)
1006
+ end
1007
+ def strip_attributes(foo)
1008
+ end
1009
+ end
1010
+ class Comment
1011
+ def strip(foo)
1012
+ end
1013
+ def strip_attributes(foo)
1014
+ end
1015
+ end
1016
+ class BogusETag
1017
+ def strip(foo)
1018
+ end
1019
+ def strip_attributes(foo)
1020
+ end
1021
+ end
1022
+
1023
+ class Elem
1024
+ def decode_entities
1025
+ children.each{ |x| x.decode_entities }
1026
+ end
1027
+
1028
+ def cull
1029
+ if children
1030
+ swap(children.to_s)
1031
+ end
1032
+ end
1033
+
1034
+ def strip
1035
+ if strip_removes?
1036
+ cull
1037
+ end
1038
+ end
1039
+
1040
+ def strip_attributes
1041
+ unless attributes.nil?
1042
+ attributes.each do |atr|
1043
+ unless Acceptable_Attributes.include?atr[0]
1044
+ remove_attribute(atr[0])
1045
+ end
1046
+ end
1047
+ end
1048
+ end
1049
+
1050
+ def strip_removes?
1051
+ # I'm sure there are others that shuould be ripped instead of stripped
1052
+ attributes && attributes['type'] =~ /script|css/
1053
+ end
1054
+ end
1055
+ end
1056
+
1057
+ module FeedParser
1058
+ Version = "0.1aleph_naught"
1059
+
1060
+ License = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
1061
+
1062
+ Redistribution and use in source and binary forms, with or without modification,
1063
+ are permitted provided that the following conditions are met:
1064
+
1065
+ * Redistributions of source code must retain the above copyright notice,
1066
+ this list of conditions and the following disclaimer.
1067
+ * Redistributions in binary form must reproduce the above copyright notice,
1068
+ this list of conditions and the following disclaimer in the documentation
1069
+ and/or other materials provided with the distribution.
1070
+
1071
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
1072
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1073
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1074
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
1075
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1076
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
1077
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
1078
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
1079
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
1080
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1081
+ POSSIBILITY OF SUCH DAMAGE."""
1082
+
1083
+ Author = "Jeff Hodges <http://somethingsimilar.com>"
1084
+ Copyright_Holder = "Mark Pilgrim <http://diveintomark.org/>"
1085
+ Contributors = [ "Jason Diamond <http://injektilo.org/>",
1086
+ "John Beimler <http://john.beimler.org/>",
1087
+ "Fazal Majid <http://www.majid.info/mylos/weblog/>",
1088
+ "Aaron Swartz <http://aaronsw.com/>",
1089
+ "Kevin Marks <http://epeus.blogspot.com/>"
1090
+ ]
1091
+ # HTTP "User-Agent" header to send to servers when downloading feeds.
1092
+ # If you are embedding feedparser in a larger application, you should
1093
+ # change this to your application name and URL.
1094
+ USER_AGENT = "UniversalFeedParser/%s +http://feedparser.org/" % @version
1095
+
1096
+ # HTTP "Accept" header to send to servers when downloading feeds. If you don't
1097
+ # want to send an Accept header, set this to None.
1098
+ ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1"
1099
+
1100
+
1101
+ # If you want feedparser to automatically run HTML markup through HTML Tidy, set
1102
+ # this to true. Requires mxTidy <http://www.egenix.com/files/python/mxTidy.html>
1103
+ # or utidylib <http://utidylib.berlios.de/>.
1104
+ TIDY_MARKUP = false #FIXME untranslated
1105
+
1106
+ # List of Python interfaces for HTML Tidy, in order of preference. Only useful
1107
+ # if TIDY_MARKUP = true
1108
+ PREFERRED_TIDY_INTERFACES = ["uTidy", "mxTidy"] #FIXME untranslated
1109
+
1110
+ # The original Python import. I'm using it to help translate
1111
+ #import sgmllib, re, sys, copy, urlparse, time, rfc822, types, cgi, urllib, urllib2
1112
+
1113
+
1114
+
1115
+ # ---------- don't touch these ----------
1116
+ class ThingsNobodyCaresAboutButMe < Exception
1117
+ end
1118
+ class CharacterEncodingOverride < ThingsNobodyCaresAboutButMe
1119
+ end
1120
+ class CharacterEncodingUnknown < ThingsNobodyCaresAboutButMe
1121
+ end
1122
+ class NonXMLContentType < ThingsNobodyCaresAboutButMe
1123
+ end
1124
+ class UndeclaredNamespace < Exception
1125
+ end
1126
+
1127
+
1128
+ SUPPORTED_VERSIONS = {'' => 'unknown',
1129
+ 'rss090' => 'RSS 0.90',
1130
+ 'rss091n' => 'RSS 0.91 (Netscape)',
1131
+ 'rss091u' => 'RSS 0.91 (Userland)',
1132
+ 'rss092' => 'RSS 0.92',
1133
+ 'rss093' => 'RSS 0.93',
1134
+ 'rss094' => 'RSS 0.94',
1135
+ 'rss20' => 'RSS 2.0',
1136
+ 'rss10' => 'RSS 1.0',
1137
+ 'rss' => 'RSS (unknown version)',
1138
+ 'atom01' => 'Atom 0.1',
1139
+ 'atom02' => 'Atom 0.2',
1140
+ 'atom03' => 'Atom 0.3',
1141
+ 'atom10' => 'Atom 1.0',
1142
+ 'atom' => 'Atom (unknown version)',
1143
+ 'cdf' => 'CDF',
1144
+ 'hotrss' => 'Hot RSS'
1145
+ }
1146
+ class FeedParserDict < Hash
1147
+ =begin
1148
+ The naming of a certain common attribute (such as, "When was the last
1149
+ time this feed was updated?") can have many different names depending
1150
+ on the type of feed we are handling. This class allows us to use
1151
+ both the attribute name a person, who has knowledge of the kind of
1152
+ feed being parsed, expects, as well as allowing a developer to rely
1153
+ on one name to contain the proper attribute no matter what kind of
1154
+ feed is being parsed. @@keymaps is a Hash that contains information
1155
+ on what certain attributes "really is" in each feed type. It does so
1156
+ by providing a common name that will map to any feed type in the keys,
1157
+ with possible "correct" attributes in the its values. the #[] and #[]=
1158
+ methods check with keymaps to see what attribute the developer "really
1159
+ means" if they've asked for one which happens to be in @@keymap's keys.
1160
+ =end
1161
+ @@keymap = {'channel' => 'feed',
1162
+ 'items' => 'entries',
1163
+ 'guid' => 'id',
1164
+ 'date' => 'updated',
1165
+ 'date_parsed' => 'updated_parsed',
1166
+ 'description' => ['subtitle', 'summary'],
1167
+ 'url' => ['href'],
1168
+ 'modified' => 'updated',
1169
+ 'modified_parsed' => 'updated_parsed',
1170
+ 'issued' => 'published',
1171
+ 'issued_parsed' => 'published_parsed',
1172
+ 'copyright' => 'rights',
1173
+ 'copyright_detail' => 'rights_detail',
1174
+ 'tagline' => 'subtitle',
1175
+ 'tagline_detail' => 'subtitle_detail'}
1176
+
1177
+ def entries # Apparently, Hash has an entries method! That blew a good 3 hours or more of my time
1178
+ return self['entries']
1179
+ end
1180
+ # We could include the [] rewrite in new using Hash.new's fancy pants block thing
1181
+ # but we'd still have to overwrite []= and such.
1182
+ # I'm going to make it easy to turn lists of pairs into FeedParserDicts's though.
1183
+ def initialize(pairs=nil)
1184
+ if pairs.class == Array and pairs[0].class == Array and pairs[0].length == 2
1185
+ pairs.each do |l|
1186
+ k,v = l
1187
+ self[k] = v
1188
+ end
1189
+ elsif pairs.class == Hash
1190
+ self.merge!(pairs)
1191
+ end
1192
+ end
1193
+
1194
+ def [](key)
1195
+ if key == 'category'
1196
+ return self['tags'][0]['term']
1197
+ end
1198
+ if key == 'categories'
1199
+ return self['tags'].collect{|tag| [tag['scheme'],tag['term']]}
1200
+ end
1201
+ realkey = @@keymap[key] || key
1202
+ if realkey.class == Array
1203
+ realkey.each{ |key| return self[key] if has_key?key }
1204
+ end
1205
+ # Note that the original key is preferred over the realkey we (might
1206
+ # have) found in @@keymaps
1207
+ if has_key?(key)
1208
+ return super(key)
1209
+ end
1210
+ return super(realkey)
1211
+ end
1212
+
1213
+ def []=(key,value)
1214
+ if @@keymap.key?key
1215
+ key = @@keymap[key]
1216
+ if key.class == Array
1217
+ key = key[0]
1218
+ end
1219
+ end
1220
+ super(key,value)
1221
+ end
1222
+
1223
+ def method_missing(msym, *args)
1224
+ methodname = msym.to_s
1225
+ if methodname[-1] == '='
1226
+ return self[methodname[0..-2]] = args[0]
1227
+ elsif methodname[-1] != '!' and methodname[-1] != '?' and methodname[0] != "_" # FIXME implement with private
1228
+ return self[methodname]
1229
+ else
1230
+ raise NoMethodError, "whoops, we don't know about the attribute or method called `#{methodname}' for #{self}:#{self.class}"
1231
+ end
1232
+ end
1233
+ end
1234
+
1235
+
1236
+
1237
+
1238
+ module FeedParserMixin
1239
+ attr_accessor :feeddata, :version, :namespacesInUse, :date_handlers
1240
+
1241
+ def startup(baseuri=nil, baselang=nil, encoding='utf-8')
1242
+ $stderr << "initializing FeedParser\n" if $debug
1243
+
1244
+ @namespaces = {'' => '',
1245
+ 'http://backend.userland.com/rss' => '',
1246
+ 'http://blogs.law.harvard.edu/tech/rss' => '',
1247
+ 'http://purl.org/rss/1.0/' => '',
1248
+ 'http://my.netscape.com/rdf/simple/0.9/' => '',
1249
+ 'http://example.com/newformat#' => '',
1250
+ 'http://example.com/necho' => '',
1251
+ 'http://purl.org/echo/' => '',
1252
+ 'uri/of/echo/namespace#' => '',
1253
+ 'http://purl.org/pie/' => '',
1254
+ 'http://purl.org/atom/ns#' => '',
1255
+ 'http://www.w3.org/2005/Atom' => '',
1256
+ 'http://purl.org/rss/1.0/modules/rss091#' => '',
1257
+ 'http://webns.net/mvcb/' => 'admin',
1258
+ 'http://purl.org/rss/1.0/modules/aggregation/' => 'ag',
1259
+ 'http://purl.org/rss/1.0/modules/annotate/' => 'annotate',
1260
+ 'http://media.tangent.org/rss/1.0/' => 'audio',
1261
+ 'http://backend.userland.com/blogChannelModule' => 'blogChannel',
1262
+ 'http://web.resource.org/cc/' => 'cc',
1263
+ 'http://backend.userland.com/creativeCommonsRssModule' => 'creativeCommons',
1264
+ 'http://purl.org/rss/1.0/modules/company' => 'co',
1265
+ 'http://purl.org/rss/1.0/modules/content/' => 'content',
1266
+ 'http://my.theinfo.org/changed/1.0/rss/' => 'cp',
1267
+ 'http://purl.org/dc/elements/1.1/' => 'dc',
1268
+ 'http://purl.org/dc/terms/' => 'dcterms',
1269
+ 'http://purl.org/rss/1.0/modules/email/' => 'email',
1270
+ 'http://purl.org/rss/1.0/modules/event/' => 'ev',
1271
+ 'http://rssnamespace.org/feedburner/ext/1.0' => 'feedburner',
1272
+ 'http://freshmeat.net/rss/fm/' => 'fm',
1273
+ 'http://xmlns.com/foaf/0.1/' => 'foaf',
1274
+ 'http://www.w3.org/2003/01/geo/wgs84_pos#' => 'geo',
1275
+ 'http://postneo.com/icbm/' => 'icbm',
1276
+ 'http://purl.org/rss/1.0/modules/image/' => 'image',
1277
+ 'http://www.itunes.com/DTDs/PodCast-1.0.dtd' => 'itunes',
1278
+ 'http://example.com/DTDs/PodCast-1.0.dtd' => 'itunes',
1279
+ 'http://purl.org/rss/1.0/modules/link/' => 'l',
1280
+ 'http://search.yahoo.com/mrss' => 'media',
1281
+ 'http://madskills.com/public/xml/rss/module/pingback/' => 'pingback',
1282
+ 'http://prismstandard.org/namespaces/1.2/basic/' => 'prism',
1283
+ 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf',
1284
+ 'http://www.w3.org/2000/01/rdf-schema#' => 'rdfs',
1285
+ 'http://purl.org/rss/1.0/modules/reference/' => 'ref',
1286
+ 'http://purl.org/rss/1.0/modules/richequiv/' => 'reqv',
1287
+ 'http://purl.org/rss/1.0/modules/search/' => 'search',
1288
+ 'http://purl.org/rss/1.0/modules/slash/' => 'slash',
1289
+ 'http://schemas.xmlsoap.org/soap/envelope/' => 'soap',
1290
+ 'http://purl.org/rss/1.0/modules/servicestatus/' => 'ss',
1291
+ 'http://hacks.benhammersley.com/rss/streaming/' => 'str',
1292
+ 'http://purl.org/rss/1.0/modules/subscription/' => 'sub',
1293
+ 'http://purl.org/rss/1.0/modules/syndication/' => 'sy',
1294
+ 'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo',
1295
+ 'http://purl.org/rss/1.0/modules/threading/' => 'thr',
1296
+ 'http://purl.org/rss/1.0/modules/textinput/' => 'ti',
1297
+ 'http://madskills.com/public/xml/rss/module/trackback/' =>'trackback',
1298
+ 'http://wellformedweb.org/commentAPI/' => 'wfw',
1299
+ 'http://purl.org/rss/1.0/modules/wiki/' => 'wiki',
1300
+ 'http://www.w3.org/1999/xhtml' => 'xhtml',
1301
+ 'http://www.w3.org/XML/1998/namespace' => 'xml',
1302
+ 'http://www.w3.org/1999/xlink' => 'xlink',
1303
+ 'http://schemas.pocketsoap.com/rss/myDescModule/' => 'szf'
1304
+ }
1305
+ @matchnamespaces = {}
1306
+ @namespaces.each do |l|
1307
+ @matchnamespaces[l[0].downcase] = l[1]
1308
+ end
1309
+ @can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'license', 'icon', 'logo']
1310
+ @can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
1311
+ @can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
1312
+ @html_types = ['text/html', 'application/xhtml+xml']
1313
+ @feeddata = FeedParserDict.new # feed-level data
1314
+ @encoding = encoding # character encoding
1315
+ @entries = [] # list of entry-level data
1316
+ @version = '' # feed type/version see SUPPORTED_VERSIOSN
1317
+ @namespacesInUse = {} # hash of namespaces defined by the feed
1318
+
1319
+ # the following are used internall to track state;
1320
+ # this is really out of control and should be refactored
1321
+ @infeed = false
1322
+ @inentry = false
1323
+ @incontent = 0 # Yes, this needs to be zero until I work out popContent and pushContent
1324
+ @intextinput = false
1325
+ @inimage = false
1326
+ @inauthor = false
1327
+ @incontributor = false
1328
+ @inpublisher = false
1329
+ @insource = false
1330
+ @sourcedata = FeedParserDict.new
1331
+ @contentparams = FeedParserDict.new
1332
+ @summaryKey = nil
1333
+ @namespacemap = {}
1334
+ @elementstack = []
1335
+ @basestack = []
1336
+ @langstack = []
1337
+ @baseuri = baseuri || ''
1338
+ @lang = baselang || nil
1339
+ if baselang
1340
+ @feeddata['language'] = baselang.gsub('_','-')
1341
+ end
1342
+ @date_handlers = [:_parse_date_rfc822,
1343
+ :_parse_date_hungarian, :_parse_date_greek,:_parse_date_mssql,
1344
+ :_parse_date_nate,:_parse_date_onblog,:_parse_date_w3dtf,:_parse_date_iso8601
1345
+ ]
1346
+ $stderr << "Leaving startup\n" if $debug # My addition
1347
+ end
1348
+
1349
+ def unknown_starttag(tag, attrsd)
1350
+ $stderr << "start #{tag} with #{attrsd}\n" if $debug
1351
+ # normalize attrs
1352
+ attrsD = {}
1353
+ attrsd = Hash[*attrsd.flatten] if attrsd.class == Array # Magic! Asterisk!
1354
+ # LooseFeedParser needs the above because SGMLParser sends attrs as a
1355
+ # list of lists (like [['type','text/html'],['mode','escaped']])
1356
+
1357
+ attrsd.each do |old_k,value|
1358
+ # There has to be a better, non-ugly way of doing this
1359
+ k = old_k.downcase # Downcase all keys
1360
+ attrsD[k] = value
1361
+ if ['rel','type'].include?value
1362
+ attrsD[k].downcase! # Downcase the value if the key is 'rel' or 'type'
1363
+ end
1364
+ end
1365
+
1366
+ # track xml:base and xml:lang
1367
+ baseuri = attrsD['xml:base'] || attrsD['base'] || @baseuri
1368
+ @baseuri = urljoin(@baseuri, baseuri)
1369
+ lang = attrsD['xml:lang'] || attrsD['lang']
1370
+ if lang == '' # FIXME This next bit of code is right? Wtf?
1371
+ # xml:lang could be explicitly set to '', we need to capture that
1372
+ lang = nil
1373
+ elsif lang.nil?
1374
+ # if no xml:lang is specified, use parent lang
1375
+ lang = @lang
1376
+ end
1377
+ if lang and not lang.empty? # Seriously, this cannot be correct
1378
+ if ['feed', 'rss', 'rdf:RDF'].include?tag
1379
+ @feeddata['language'] = lang.gsub('_','-')
1380
+ end
1381
+ end
1382
+ @lang = lang
1383
+ @basestack << @baseuri
1384
+ @langstack << lang
1385
+
1386
+ # track namespaces
1387
+ attrsd.each do |prefix, uri|
1388
+ if /^xmlns:/ =~ prefix # prefix begins with xmlns:
1389
+ trackNamespace(prefix[6..-1], uri)
1390
+ elsif prefix == 'xmlns':
1391
+ trackNamespace(nil, uri)
1392
+ end
1393
+ end
1394
+
1395
+ # track inline content
1396
+ if @incontent != 0 and @contentparams.has_key?('type') and not ( /xml$/ =~ (@contentparams['type'] || 'xml') )
1397
+ # element declared itself as escaped markup, but isn't really
1398
+
1399
+ @contentparams['type'] = 'application/xhtml+xml'
1400
+ end
1401
+ if @incontent != 0 and @contentparams['type'] == 'application/xhtml+xml'
1402
+ # Note: probably shouldn't simply recreate localname here, but
1403
+ # our namespace handling isn't actually 100% correct in cases where
1404
+ # the feed redefines the default namespace (which is actually
1405
+ # the usual case for inline content, thanks Sam), so here we
1406
+ # cheat and just reconstruct the element based on localname
1407
+ # because that compensates for the bugs in our namespace handling.
1408
+ # This will horribly munge inline content with non-empty qnames,
1409
+ # but nobody actually does that, so I'm not fixing it.
1410
+ tag = tag.split(':')[-1]
1411
+ attrsA = attrsd.to_a.collect{|l| "#{l[0]}=\"#{l[1]}\""}
1412
+ attrsS = ' '+attrsA.join(' ')
1413
+ return handle_data("<#{tag}#{attrsS}>", escape=false)
1414
+ end
1415
+
1416
+ # match namespaces
1417
+ if /:/ =~ tag
1418
+ prefix, suffix = tag.split(':', 2)
1419
+ else
1420
+ prefix, suffix = '', tag
1421
+ end
1422
+ prefix = @namespacemap[prefix] || prefix
1423
+ if prefix and not prefix.empty?
1424
+ prefix = prefix + '_'
1425
+ end
1426
+
1427
+ # special hack for better tracking of empty textinput/image elements in illformed feeds
1428
+ if (not prefix and not prefix.empty?) and not (['title', 'link', 'description','name'].include?tag)
1429
+ @intextinput = false
1430
+ end
1431
+ if (prefix.nil? or prefix.empty?) and not (['title', 'link', 'description', 'url', 'href', 'width', 'height'].include?tag)
1432
+ @inimage = false
1433
+ end
1434
+
1435
+ # call special handler (if defined) or default handler
1436
+ begin
1437
+ return send('_start_'+prefix+suffix, attrsD)
1438
+ rescue NoMethodError
1439
+ return push(prefix + suffix, true)
1440
+ end
1441
+ end # End unknown_starttag
1442
+
1443
+ def unknown_endtag(tag)
1444
+ $stderr << "end #{tag}\n" if $debug
1445
+ # match namespaces
1446
+ if tag.index(':')
1447
+ prefix, suffix = tag.split(':',2)
1448
+ else
1449
+ prefix, suffix = '', tag
1450
+ end
1451
+ prefix = @namespacemap[prefix] || prefix
1452
+ if prefix and not prefix.empty?
1453
+ prefix = prefix + '_'
1454
+ end
1455
+
1456
+ # call special handler (if defined) or default handler
1457
+ begin
1458
+ send('_end_' + prefix + suffix) # NOTE no return here! do not add it!
1459
+ rescue NoMethodError => details
1460
+ pop(prefix + suffix)
1461
+ end
1462
+
1463
+ # track inline content
1464
+ if @incontent != 0 and @contentparams.has_key?'type' and /xml$/ =~ (@contentparams['type'] || 'xml')
1465
+ # element declared itself as escaped markup, but it isn't really
1466
+ @contentparams['type'] = 'application/xhtml+xml'
1467
+ end
1468
+ if @incontent != 0 and @contentparams['type'] == 'application/xhtml+xml'
1469
+ tag = tag.split(':')[-1]
1470
+ handle_data("</#{tag}>", escape=false)
1471
+ end
1472
+
1473
+ # track xml:base and xml:lang going out of scope
1474
+ if @basestack and not @basestack.empty?
1475
+ @basestack.pop
1476
+ if @basestack and @basestack[-1] and not (@basestack.empty? or @basestack[-1].empty?)
1477
+ @baseuri = @basestack[-1]
1478
+ end
1479
+ end
1480
+ if @langstack and not @langstack.empty?
1481
+ @langstack.pop
1482
+ if @langstack and not @langstack.empty? # and @langstack[-1] and not @langstack.empty?
1483
+ @lang = @langstack[-1]
1484
+ end
1485
+ end
1486
+ end
1487
+
1488
+ def handle_charref(ref)
1489
+ # LooseParserOnly
1490
+ # called for each character reference, e.g. for '&#160;', ref will be '160'
1491
+ $stderr << "entering handle_charref with #{ref}\n" if $debug
1492
+ return if @elementstack.nil? or @elementstack.empty?
1493
+ ref.downcase!
1494
+ chars = ['34', '38', '39', '60', '62', 'x22', 'x26', 'x27', 'x3c', 'x3e']
1495
+ if chars.include?ref
1496
+ text = "&##{ref};"
1497
+ else
1498
+ if ref[0..0] == 'x'
1499
+ c = (ref[1..-1]).to_i(16)
1500
+ else
1501
+ c = ref.to_i
1502
+ end
1503
+ text = uconvert(unichr(c),'unicode')
1504
+ end
1505
+ @elementstack[-1][2] << text
1506
+ end
1507
+
1508
+ def handle_entityref(ref)
1509
+ # LooseParserOnly
1510
+ # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
1511
+
1512
+ return if @elementstack.nil? or @elementstack.empty?
1513
+ $stderr << "entering handle_entityref with #{ref}\n" if $debug
1514
+ ents = ['lt', 'gt', 'quot', 'amp', 'apos']
1515
+ if ents.include?ref
1516
+ text = "&#{ref};"
1517
+ else
1518
+ text = HTMLEntities::decode_entities("&#{ref};")
1519
+ end
1520
+ @elementstack[-1][2] << text
1521
+ end
1522
+
1523
+ def handle_data(text, escape=true)
1524
+ # called for each block of plain text, i.e. outside of any tag and
1525
+ # not containing any character or entity references
1526
+ return if @elementstack.nil? or @elementstack.empty?
1527
+ if escape and @contentparams['type'] == 'application/xhtml+xml'
1528
+ text = text.to_xs
1529
+ end
1530
+ @elementstack[-1][2] << text
1531
+ end
1532
+
1533
+ def handle_comment(comment)
1534
+ # called for each comment, e.g. <!-- insert message here -->
1535
+ end
1536
+
1537
+ def handle_pi(text)
1538
+ end
1539
+
1540
+ def handle_decl(text)
1541
+ end
1542
+
1543
+ def parse_declaration(i)
1544
+ # for LooseFeedParser
1545
+ $stderr << "entering parse_declaration\n" if $debug
1546
+ if @rawdata[i...i+9] == '<![CDATA['
1547
+ k = @rawdata.index(/\]\]>/u,i+9)
1548
+ k = @rawdata.length unless k
1549
+ handle_data(@rawdata[i+9...k].to_xs,false)
1550
+ return k+3
1551
+ else
1552
+ k = @rawdata.index(/>/,i).to_i
1553
+ return k+1
1554
+ end
1555
+ end
1556
+
1557
+ def mapContentType(contentType)
1558
+ contentType.downcase!
1559
+ case contentType
1560
+ when 'text'
1561
+ contentType = 'text/plain'
1562
+ when 'html'
1563
+ contentType = 'text/html'
1564
+ when 'xhtml'
1565
+ contentType = 'application/xhtml+xml'
1566
+ end
1567
+ return contentType
1568
+ end
1569
+
1570
+ def trackNamespace(prefix, uri)
1571
+
1572
+ loweruri = uri.downcase.strip
1573
+ if [prefix, loweruri] == [nil, 'http://my.netscape.com/rdf/simple/0.9/'] and (@version.nil? or @version.empty?)
1574
+ @version = 'rss090'
1575
+ elsif loweruri == 'http://purl.org/rss/1.0/' and (@version.nil? or @version.empty?)
1576
+ @version = 'rss10'
1577
+ elsif loweruri == 'http://www.w3.org/2005/atom' and (@version.nil? or @version.empty?)
1578
+ @version = 'atom10'
1579
+ elsif /backend\.userland\.com\/rss/ =~ loweruri
1580
+ # match any backend.userland.com namespace
1581
+ uri = 'http://backend.userland.com/rss'
1582
+ loweruri = uri
1583
+ end
1584
+ if @matchnamespaces.has_key? loweruri
1585
+ @namespacemap[prefix] = @matchnamespaces[loweruri]
1586
+ @namespacesInUse[@matchnamespaces[loweruri]] = uri
1587
+ else
1588
+ @namespacesInUse[prefix || ''] = uri
1589
+ end
1590
+ end
1591
+
1592
+ def resolveURI(uri)
1593
+ return urljoin(@baseuri || '', uri)
1594
+ end
1595
+
1596
+ def decodeEntities(element, data)
1597
+ return data
1598
+ end
1599
+
1600
+ def push(element, expectingText)
1601
+ @elementstack << [element, expectingText, []]
1602
+ end
1603
+
1604
+ def pop(element, stripWhitespace=true)
1605
+ return if @elementstack.nil? or @elementstack.empty?
1606
+ return if @elementstack[-1][0] != element
1607
+ element, expectingText, pieces = @elementstack.pop
1608
+ if pieces.class == Array
1609
+ output = pieces.join('')
1610
+ else
1611
+ output = pieces
1612
+ end
1613
+ if stripWhitespace
1614
+ output.strip!
1615
+ end
1616
+ return output if not expectingText
1617
+
1618
+ # decode base64 content
1619
+ if @contentparams['base64']
1620
+ out64 = Base64::decode64(output) # a.k.a. [output].unpack('m')[0]
1621
+ if not output.empty? and not out64.empty?
1622
+ output = out64
1623
+ end
1624
+ end
1625
+
1626
+ # resolve relative URIs
1627
+ if @can_be_relative_uri.include?element and output and not output.empty?
1628
+ output = resolveURI(output)
1629
+ end
1630
+
1631
+ # decode entities within embedded markup
1632
+ if not @contentparams['base64']
1633
+ output = decodeEntities(element, output)
1634
+ end
1635
+
1636
+ # remove temporary cruft from contentparams
1637
+ @contentparams.delete('mode')
1638
+ @contentparams.delete('base64')
1639
+
1640
+ # resolve relative URIs within embedded markup
1641
+ if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
1642
+ if @can_contain_relative_uris.include?element
1643
+ output = FeedParser.resolveRelativeURIs(output, @baseuri, @encoding)
1644
+ end
1645
+ end
1646
+ # sanitize embedded markup
1647
+ if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
1648
+ if @can_contain_dangerous_markup.include?element
1649
+ output = FeedParser.sanitizeHTML(output, @encoding)
1650
+ end
1651
+ end
1652
+
1653
+ if @encoding and not @encoding.empty? and @encoding != 'utf-8'
1654
+ output = uconvert(output, @encoding, 'utf-8')
1655
+ # FIXME I turn everything into utf-8, not unicode, originally because REXML was being used but now beause I haven't tested it out yet.
1656
+ end
1657
+
1658
+ # categories/tags/keywords/whatever are handled in _end_category
1659
+ return output if element == 'category'
1660
+
1661
+ # store output in appropriate place(s)
1662
+ if @inentry and not @insource
1663
+ if element == 'content'
1664
+ @entries[-1][element] ||= []
1665
+ contentparams = Marshal.load(Marshal.dump(@contentparams)) # deepcopy
1666
+ contentparams['value'] = output
1667
+ @entries[-1][element] << contentparams
1668
+ elsif element == 'link'
1669
+ @entries[-1][element] = output
1670
+ if output and not output.empty?
1671
+ @entries[-1]['links'][-1]['href'] = output
1672
+ end
1673
+ else
1674
+ element = 'summary' if element == 'description'
1675
+ @entries[-1][element] = output
1676
+ if @incontent != 0
1677
+ contentparams = Marshal.load(Marshal.dump(@contentparams))
1678
+ contentparams['value'] = output
1679
+ @entries[-1][element + '_detail'] = contentparams
1680
+ end
1681
+ end
1682
+ elsif (@infeed or @insource) and not @intextinput and not @inimage
1683
+ context = getContext()
1684
+ element = 'subtitle' if element == 'description'
1685
+ context[element] = output
1686
+ if element == 'link'
1687
+ context['links'][-1]['href'] = output
1688
+ elsif @incontent != 0
1689
+ contentparams = Marshal.load(Marshal.dump(@contentparams))
1690
+ contentparams['value'] = output
1691
+ context[element + '_detail'] = contentparams
1692
+ end
1693
+ end
1694
+ return output
1695
+ end
1696
+
1697
+ def pushContent(tag, attrsD, defaultContentType, expectingText)
1698
+ @incontent += 1 # Yes, I hate this.
1699
+ type = mapContentType(attrsD['type'] || defaultContentType)
1700
+ @contentparams = FeedParserDict.new({'type' => type,'language' => @lang,'base' => @baseuri})
1701
+ @contentparams['base64'] = isBase64(attrsD, @contentparams)
1702
+ push(tag, expectingText)
1703
+ end
1704
+
1705
+ def popContent(tag)
1706
+ value = pop(tag)
1707
+ @incontent -= 1
1708
+ @contentparams.clear
1709
+ return value
1710
+ end
1711
+
1712
+ def mapToStandardPrefix(name)
1713
+ colonpos = name.index(':')
1714
+ if colonpos
1715
+ prefix = name[0..colonpos-1]
1716
+ suffix = name[colonpos+1..-1]
1717
+ prefix = @namespacemap[prefix] || prefix
1718
+ name = prefix + ':' + suffix
1719
+ end
1720
+ return name
1721
+ end
1722
+
1723
+ def getAttribute(attrsD, name)
1724
+ return attrsD[mapToStandardPrefix(name)]
1725
+ end
1726
+
1727
+ def isBase64(attrsD, contentparams)
1728
+ return true if (attrsD['mode'] == 'base64')
1729
+ if /(^text\/)|(\+xml$)|(\/xml$)/ =~ contentparams['type']
1730
+ return false
1731
+ end
1732
+ return true
1733
+ end
1734
+
1735
+ def itsAnHrefDamnIt(attrsD)
1736
+ href= attrsD['url'] || attrsD['uri'] || attrsD['href']
1737
+ if href
1738
+ attrsD.delete('url')
1739
+ attrsD.delete('uri')
1740
+ attrsD['href'] = href
1741
+ end
1742
+ return attrsD
1743
+ end
1744
+
1745
+
1746
+ def _save(key, value)
1747
+ context = getContext()
1748
+ context[key] ||= value
1749
+ end
1750
+
1751
+ def _start_rss(attrsD)
1752
+ versionmap = {'0.91' => 'rss091u',
1753
+ '0.92' => 'rss092',
1754
+ '0.93' => 'rss093',
1755
+ '0.94' => 'rss094'
1756
+ }
1757
+
1758
+ if not @version or @version.empty?
1759
+ attr_version = attrsD['version'] || ''
1760
+ version = versionmap[attr_version]
1761
+ if version and not version.empty?
1762
+ @version = version
1763
+ elsif /^2\./ =~ attr_version
1764
+ @version = 'rss20'
1765
+ else
1766
+ @version = 'rss'
1767
+ end
1768
+ end
1769
+ end
1770
+
1771
+ def _start_dlhottitles(attrsD)
1772
+ @version = 'hotrss'
1773
+ end
1774
+
1775
+ def _start_channel(attrsD)
1776
+ @infeed = true
1777
+ _cdf_common(attrsD)
1778
+ end
1779
+ alias :_start_feedinfo :_start_channel
1780
+
1781
+ def _cdf_common(attrsD)
1782
+ if attrsD.has_key?'lastmod'
1783
+ _start_modified({})
1784
+ @elementstack[-1][-1] = attrsD['lastmod']
1785
+ _end_modified
1786
+ end
1787
+ if attrsD.has_key?'href'
1788
+ _start_link({})
1789
+ @elementstack[-1][-1] = attrsD['href']
1790
+ _end_link
1791
+ end
1792
+ end
1793
+
1794
+ def _start_feed(attrsD)
1795
+ @infeed = true
1796
+ versionmap = {'0.1' => 'atom01',
1797
+ '0.2' => 'atom02',
1798
+ '0.3' => 'atom03'
1799
+ }
1800
+
1801
+ if not @version or @version.empty?
1802
+ attr_version = attrsD['version']
1803
+ version = versionmap[attr_version]
1804
+ if @version and not @version.empty?
1805
+ @version = version
1806
+ else
1807
+ @version = 'atom'
1808
+ end
1809
+ end
1810
+ end
1811
+
1812
+ def _end_channel
1813
+ @infeed = false
1814
+ end
1815
+ alias :_end_feed :_end_channel
1816
+
1817
+ def _start_image(attrsD)
1818
+ @inimage = true
1819
+ push('image', false)
1820
+ context = getContext()
1821
+ context['image'] ||= FeedParserDict.new
1822
+ end
1823
+
1824
+ def _end_image
1825
+ pop('image')
1826
+ @inimage = false
1827
+ end
1828
+
1829
+ def _start_textinput(attrsD)
1830
+ @intextinput = true
1831
+ push('textinput', false)
1832
+ context = getContext()
1833
+ context['textinput'] ||= FeedParserDict.new
1834
+ end
1835
+ alias :_start_textInput :_start_textinput
1836
+
1837
+ def _end_textinput
1838
+ pop('textinput')
1839
+ @intextinput = false
1840
+ end
1841
+ alias :_end_textInput :_end_textinput
1842
+
1843
+ def _start_author(attrsD)
1844
+ @inauthor = true
1845
+ push('author', true)
1846
+ end
1847
+ alias :_start_managingeditor :_start_author
1848
+ alias :_start_dc_author :_start_author
1849
+ alias :_start_dc_creator :_start_author
1850
+ alias :_start_itunes_author :_start_author
1851
+
1852
+ def _end_author
1853
+ pop('author')
1854
+ @inauthor = false
1855
+ _sync_author_detail()
1856
+ end
1857
+ alias :_end_managingeditor :_end_author
1858
+ alias :_end_dc_author :_end_author
1859
+ alias :_end_dc_creator :_end_author
1860
+ alias :_end_itunes_author :_end_author
1861
+
1862
+ def _start_itunes_owner(attrsD)
1863
+ @inpublisher = true
1864
+ push('publisher', false)
1865
+ end
1866
+
1867
+ def _end_itunes_owner
1868
+ pop('publisher')
1869
+ @inpublisher = false
1870
+ _sync_author_detail('publisher')
1871
+ end
1872
+
1873
+ def _start_contributor(attrsD)
1874
+ @incontributor = true
1875
+ context = getContext()
1876
+ context['contributors'] ||= []
1877
+ context['contributors'] << FeedParserDict.new
1878
+ push('contributor', false)
1879
+ end
1880
+
1881
+ def _end_contributor
1882
+ pop('contributor')
1883
+ @incontributor = false
1884
+ end
1885
+
1886
+ def _start_dc_contributor(attrsD)
1887
+ @incontributor = true
1888
+ context = getContext()
1889
+ context['contributors'] ||= []
1890
+ context['contributors'] << FeedParserDict.new
1891
+ push('name', false)
1892
+ end
1893
+
1894
+ def _end_dc_contributor
1895
+ _end_name
1896
+ @incontributor = false
1897
+ end
1898
+
1899
+ def _start_name(attrsD)
1900
+ push('name', false)
1901
+ end
1902
+ alias :_start_itunes_name :_start_name
1903
+
1904
+ def _end_name
1905
+ value = pop('name')
1906
+ if @inpublisher
1907
+ _save_author('name', value, 'publisher')
1908
+ elsif @inauthor
1909
+ _save_author('name', value)
1910
+ elsif @incontributor
1911
+ _save_contributor('name', value)
1912
+ elsif @intextinput
1913
+ context = getContext()
1914
+ context['textinput']['name'] = value
1915
+ end
1916
+ end
1917
+ alias :_end_itunes_name :_end_name
1918
+
1919
+ def _start_width(attrsD)
1920
+ push('width', false)
1921
+ end
1922
+
1923
+ def _end_width
1924
+ value = pop('width').to_i
1925
+ if @inimage
1926
+ context = getContext
1927
+ context['image']['width'] = value
1928
+ end
1929
+ end
1930
+
1931
+ def _start_height(attrsD)
1932
+ push('height', false)
1933
+ end
1934
+
1935
+ def _end_height
1936
+ value = pop('height').to_i
1937
+ if @inimage
1938
+ context = getContext()
1939
+ context['image']['height'] = value
1940
+ end
1941
+ end
1942
+
1943
+ def _start_url(attrsD)
1944
+ push('href', true)
1945
+ end
1946
+ alias :_start_homepage :_start_url
1947
+ alias :_start_uri :_start_url
1948
+
1949
+ def _end_url
1950
+ value = pop('href')
1951
+ if @inauthor
1952
+ _save_author('href', value)
1953
+ elsif @incontributor
1954
+ _save_contributor('href', value)
1955
+ elsif @inimage
1956
+ context = getContext()
1957
+ context['image']['href'] = value
1958
+ elsif @intextinput
1959
+ context = getContext()
1960
+ context['textinput']['link'] = value
1961
+ end
1962
+ end
1963
+ alias :_end_homepage :_end_url
1964
+ alias :_end_uri :_end_url
1965
+
1966
+ def _start_email(attrsD)
1967
+ push('email', false)
1968
+ end
1969
+ alias :_start_itunes_email :_start_email
1970
+
1971
+ def _end_email
1972
+ value = pop('email')
1973
+ if @inpublisher
1974
+ _save_author('email', value, 'publisher')
1975
+ elsif @inauthor
1976
+ _save_author('email', value)
1977
+ elsif @incontributor
1978
+ _save_contributor('email', value)
1979
+ end
1980
+ end
1981
+ alias :_end_itunes_email :_end_email
1982
+
1983
+ def getContext
1984
+ if @insource
1985
+ context = @sourcedata
1986
+ elsif @inentry
1987
+ context = @entries[-1]
1988
+ else
1989
+ context = @feeddata
1990
+ end
1991
+ return context
1992
+ end
1993
+
1994
+ def _save_author(key, value, prefix='author')
1995
+ context = getContext()
1996
+ context[prefix + '_detail'] ||= FeedParserDict.new
1997
+ context[prefix + '_detail'][key] = value
1998
+ _sync_author_detail()
1999
+ end
2000
+
2001
+ def _save_contributor(key, value)
2002
+ context = getContext
2003
+ context['contributors'] ||= [FeedParserDict.new]
2004
+ context['contributors'][-1][key] = value
2005
+ end
2006
+
2007
+ def _sync_author_detail(key='author')
2008
+ context = getContext()
2009
+ detail = context["#{key}_detail"]
2010
+ if detail and not detail.empty?
2011
+ name = detail['name']
2012
+ email = detail['email']
2013
+
2014
+ if name and email and not (name.empty? or name.empty?)
2015
+ context[key] = "#{name} (#{email})"
2016
+ elsif name and not name.empty?
2017
+ context[key] = name
2018
+ elsif email and not email.empty?
2019
+ context[key] = email
2020
+ end
2021
+ else
2022
+ author = context[key].dup unless context[key].nil?
2023
+ return if not author or author.empty?
2024
+ emailmatch = author.match(/(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))/)
2025
+ email = emailmatch[1]
2026
+ author.gsub!(email, '')
2027
+ author.gsub!("\(\)", '')
2028
+ author.strip!
2029
+ author.gsub!(/^\(/,'')
2030
+ author.gsub!(/\)$/,'')
2031
+ author.strip!
2032
+ context["#{key}_detail"] ||= FeedParserDict.new
2033
+ context["#{key}_detail"]['name'] = author
2034
+ context["#{key}_detail"]['email'] = email
2035
+ end
2036
+ end
2037
+
2038
+ def _start_subtitle(attrsD)
2039
+ pushContent('subtitle', attrsD, 'text/plain', true)
2040
+ end
2041
+ alias :_start_tagline :_start_subtitle
2042
+ alias :_start_itunes_subtitle :_start_subtitle
2043
+
2044
+ def _end_subtitle
2045
+ popContent('subtitle')
2046
+ end
2047
+ alias :_end_tagline :_end_subtitle
2048
+ alias :_end_itunes_subtitle :_end_subtitle
2049
+
2050
+ def _start_rights(attrsD)
2051
+ pushContent('rights', attrsD, 'text/plain', true)
2052
+ end
2053
+ alias :_start_dc_rights :_start_rights
2054
+ alias :_start_copyright :_start_rights
2055
+
2056
+ def _end_rights
2057
+ popContent('rights')
2058
+ end
2059
+ alias :_end_dc_rights :_end_rights
2060
+ alias :_end_copyright :_end_rights
2061
+
2062
+ def _start_item(attrsD)
2063
+ @entries << FeedParserDict.new
2064
+ push('item', false)
2065
+ @inentry = true
2066
+ @guidislink = false
2067
+ id = getAttribute(attrsD, 'rdf:about')
2068
+ if id and not id.empty?
2069
+ context = getContext()
2070
+ context['id'] = id
2071
+ end
2072
+ _cdf_common(attrsD)
2073
+ end
2074
+ alias :_start_entry :_start_item
2075
+ alias :_start_product :_start_item
2076
+
2077
+ def _end_item
2078
+ pop('item')
2079
+ @inentry = false
2080
+ end
2081
+ alias :_end_entry :_end_item
2082
+
2083
+ def _start_dc_language(attrsD)
2084
+ push('language', true)
2085
+ end
2086
+ alias :_start_language :_start_dc_language
2087
+
2088
+ def _end_dc_language
2089
+ @lang = pop('language')
2090
+ end
2091
+ alias :_end_language :_end_dc_language
2092
+
2093
+ def _start_dc_publisher(attrsD)
2094
+ push('publisher', true)
2095
+ end
2096
+ alias :_start_webmaster :_start_dc_publisher
2097
+
2098
+ def _end_dc_publisher
2099
+ pop('publisher')
2100
+ _sync_author_detail('publisher')
2101
+ end
2102
+ alias :_end_webmaster :_end_dc_publisher
2103
+
2104
+ def _start_published(attrsD)
2105
+ push('published', true)
2106
+ end
2107
+ alias :_start_dcterms_issued :_start_published
2108
+ alias :_start_issued :_start_published
2109
+
2110
+ def _end_published
2111
+ value = pop('published')
2112
+ _save('published_parsed', parse_date(value))
2113
+ end
2114
+ alias :_end_dcterms_issued :_end_published
2115
+ alias :_end_issued :_end_published
2116
+
2117
+ def _start_updated(attrsD)
2118
+ push('updated', true)
2119
+ end
2120
+ alias :_start_modified :_start_updated
2121
+ alias :_start_dcterms_modified :_start_updated
2122
+ alias :_start_pubdate :_start_updated
2123
+ alias :_start_dc_date :_start_updated
2124
+
2125
+ def _end_updated
2126
+ value = pop('updated')
2127
+ _save('updated_parsed', parse_date(value))
2128
+ end
2129
+ alias :_end_modified :_end_updated
2130
+ alias :_end_dcterms_modified :_end_updated
2131
+ alias :_end_pubdate :_end_updated
2132
+ alias :_end_dc_date :_end_updated
2133
+
2134
+ def _start_created(attrsD)
2135
+ push('created', true)
2136
+ end
2137
+ alias :_start_dcterms_created :_start_created
2138
+
2139
+ def _end_created
2140
+ value = pop('created')
2141
+ _save('created_parsed', parse_date(value))
2142
+ end
2143
+ alias :_end_dcterms_created :_end_created
2144
+
2145
+ def _start_expirationdate(attrsD)
2146
+ push('expired', true)
2147
+ end
2148
+ def _end_expirationdate
2149
+ _save('expired_parsed', parse_date(pop('expired')))
2150
+ end
2151
+
2152
+ def _start_cc_license(attrsD)
2153
+ push('license', true)
2154
+ value = getAttribute(attrsD, 'rdf:resource')
2155
+ if value and not value.empty?
2156
+ elementstack[-1][2] << value
2157
+ pop('license')
2158
+ end
2159
+ end
2160
+
2161
+ def _start_creativecommons_license(attrsD)
2162
+ push('license', true)
2163
+ end
2164
+
2165
+ def _end_creativecommons_license
2166
+ pop('license')
2167
+ end
2168
+
2169
+ def addTag(term, scheme, label)
2170
+ context = getContext()
2171
+ context['tags'] ||= []
2172
+ tags = context['tags']
2173
+ if (term.nil? or term.empty?) and (scheme.nil? or scheme.empty?) and (label.nil? or label.empty?)
2174
+ return
2175
+ end
2176
+ value = FeedParserDict.new({'term' => term, 'scheme' => scheme, 'label' => label})
2177
+ if not tags.include?value
2178
+ context['tags'] << FeedParserDict.new({'term' => term, 'scheme' => scheme, 'label' => label})
2179
+ end
2180
+ end
2181
+
2182
+ def _start_category(attrsD)
2183
+ $stderr << "entering _start_category with #{attrsD}\n" if $debug
2184
+
2185
+ term = attrsD['term']
2186
+ scheme = attrsD['scheme'] || attrsD['domain']
2187
+ label = attrsD['label']
2188
+ addTag(term, scheme, label)
2189
+ push('category', true)
2190
+ end
2191
+ alias :_start_dc_subject :_start_category
2192
+ alias :_start_keywords :_start_category
2193
+
2194
+ def _end_itunes_keywords
2195
+ pop('itunes_keywords').split.each do |term|
2196
+ addTag(term, 'http://www.itunes.com/', nil)
2197
+ end
2198
+ end
2199
+
2200
+ def _start_itunes_category(attrsD)
2201
+ addTag(attrsD['text'], 'http://www.itunes.com/', nil)
2202
+ push('category', true)
2203
+ end
2204
+
2205
+ def _end_category
2206
+ value = pop('category')
2207
+ return if value.nil? or value.empty?
2208
+ context = getContext()
2209
+ tags = context['tags']
2210
+ if value and not value.empty? and not tags.empty? and not tags[-1]['term']:
2211
+ tags[-1]['term'] = value
2212
+ else
2213
+ addTag(value, nil, nil)
2214
+ end
2215
+ end
2216
+ alias :_end_dc_subject :_end_category
2217
+ alias :_end_keywords :_end_category
2218
+ alias :_end_itunes_category :_end_category
2219
+
2220
+ def _start_cloud(attrsD)
2221
+ getContext()['cloud'] = FeedParserDict.new(attrsD)
2222
+ end
2223
+
2224
+ def _start_link(attrsD)
2225
+ attrsD['rel'] ||= 'alternate'
2226
+ attrsD['type'] ||= 'text/html'
2227
+ attrsD = itsAnHrefDamnIt(attrsD)
2228
+ if attrsD.has_key? 'href'
2229
+ attrsD['href'] = resolveURI(attrsD['href'])
2230
+ end
2231
+ expectingText = @infeed || @inentry || @insource
2232
+ context = getContext()
2233
+ context['links'] ||= []
2234
+ context['links'] << FeedParserDict.new(attrsD)
2235
+ if attrsD['rel'] == 'enclosure'
2236
+ _start_enclosure(attrsD)
2237
+ end
2238
+ if attrsD.has_key? 'href'
2239
+ expectingText = false
2240
+ if (attrsD['rel'] == 'alternate') and @html_types.include?mapContentType(attrsD['type'])
2241
+ context['link'] = attrsD['href']
2242
+ end
2243
+ else
2244
+ push('link', expectingText)
2245
+ end
2246
+ end
2247
+ alias :_start_producturl :_start_link
2248
+
2249
+ def _end_link
2250
+ value = pop('link')
2251
+ context = getContext()
2252
+ if @intextinput
2253
+ context['textinput']['link'] = value
2254
+ end
2255
+ if @inimage
2256
+ context['image']['link'] = value
2257
+ end
2258
+ end
2259
+ alias :_end_producturl :_end_link
2260
+
2261
+ def _start_guid(attrsD)
2262
+ @guidislink = ((attrsD['ispermalink'] || 'true') == 'true')
2263
+ push('id', true)
2264
+ end
2265
+
2266
+ def _end_guid
2267
+ value = pop('id')
2268
+ _save('guidislink', (@guidislink and not getContext().has_key?('link')))
2269
+ if @guidislink:
2270
+ # guid acts as link, but only if 'ispermalink' is not present or is 'true',
2271
+ # and only if the item doesn't already have a link element
2272
+ _save('link', value)
2273
+ end
2274
+ end
2275
+
2276
+
2277
+ def _start_title(attrsD)
2278
+ pushContent('title', attrsD, 'text/plain', @infeed || @inentry || @insource)
2279
+ end
2280
+ alias :_start_dc_title :_start_title
2281
+ alias :_start_media_title :_start_title
2282
+
2283
+ def _end_title
2284
+ value = popContent('title')
2285
+ context = getContext()
2286
+ if @intextinput
2287
+ context['textinput']['title'] = value
2288
+ elsif @inimage
2289
+ context['image']['title'] = value
2290
+ end
2291
+ end
2292
+ alias :_end_dc_title :_end_title
2293
+ alias :_end_media_title :_end_title
2294
+
2295
+ def _start_description(attrsD)
2296
+ context = getContext()
2297
+ if context.has_key?('summary')
2298
+ @summaryKey = 'content'
2299
+ _start_content(attrsD)
2300
+ else
2301
+ pushContent('description', attrsD, 'text/html', @infeed || @inentry || @insource)
2302
+ end
2303
+ end
2304
+
2305
+ def _start_abstract(attrsD)
2306
+ pushContent('description', attrsD, 'text/plain', @infeed || @inentry || @insource)
2307
+ end
2308
+
2309
+ def _end_description
2310
+ if @summaryKey == 'content'
2311
+ _end_content()
2312
+ else
2313
+ value = popContent('description')
2314
+ context = getContext()
2315
+ if @intextinput
2316
+ context['textinput']['description'] = value
2317
+ elsif @inimage:
2318
+ context['image']['description'] = value
2319
+ end
2320
+ end
2321
+ @summaryKey = nil
2322
+ end
2323
+ alias :_end_abstract :_end_description
2324
+
2325
+ def _start_info(attrsD)
2326
+ pushContent('info', attrsD, 'text/plain', true)
2327
+ end
2328
+ alias :_start_feedburner_browserfriendly :_start_info
2329
+
2330
+ def _end_info
2331
+ popContent('info')
2332
+ end
2333
+ alias :_end_feedburner_browserfriendly :_end_info
2334
+
2335
+ def _start_generator(attrsD)
2336
+ if attrsD and not attrsD.empty?
2337
+ attrsD = itsAnHrefDamnIt(attrsD)
2338
+ if attrsD.has_key?('href')
2339
+ attrsD['href'] = resolveURI(attrsD['href'])
2340
+ end
2341
+ end
2342
+ getContext()['generator_detail'] = FeedParserDict.new(attrsD)
2343
+ push('generator', true)
2344
+ end
2345
+
2346
+ def _end_generator
2347
+ value = pop('generator')
2348
+ context = getContext()
2349
+ if context.has_key?('generator_detail')
2350
+ context['generator_detail']['name'] = value
2351
+ end
2352
+ end
2353
+
2354
+ def _start_admin_generatoragent(attrsD)
2355
+ push('generator', true)
2356
+ value = getAttribute(attrsD, 'rdf:resource')
2357
+ if value and not value.empty?
2358
+ elementstack[-1][2] << value
2359
+ end
2360
+ pop('generator')
2361
+ getContext()['generator_detail'] = FeedParserDict.new({'href' => value})
2362
+ end
2363
+
2364
+ def _start_admin_errorreportsto(attrsD)
2365
+ push('errorreportsto', true)
2366
+ value = getAttribute(attrsD, 'rdf:resource')
2367
+ if value and not value.empty?
2368
+ @elementstack[-1][2] << value
2369
+ end
2370
+ pop('errorreportsto')
2371
+ end
2372
+
2373
+ def _start_summary(attrsD)
2374
+ context = getContext()
2375
+ if context.has_key?'summary'
2376
+ @summaryKey = 'content'
2377
+ _start_content(attrsD)
2378
+ else
2379
+ @summaryKey = 'summary'
2380
+ pushContent(@summaryKey, attrsD, 'text/plain', true)
2381
+ end
2382
+ end
2383
+ alias :_start_itunes_summary :_start_summary
2384
+
2385
+ def _end_summary
2386
+ if @summaryKey == 'content':
2387
+ _end_content()
2388
+ else
2389
+ popContent(@summaryKey || 'summary')
2390
+ end
2391
+ @summaryKey = nil
2392
+ end
2393
+ alias :_end_itunes_summary :_end_summary
2394
+
2395
+ def _start_enclosure(attrsD)
2396
+ attrsD = itsAnHrefDamnIt(attrsD)
2397
+ getContext()['enclosures'] ||= []
2398
+ getContext()['enclosures'] << FeedParserDict.new(attrsD)
2399
+ href = attrsD['href']
2400
+ if href and not href.empty?
2401
+ context = getContext()
2402
+ if not context['id']
2403
+ context['id'] = href
2404
+ end
2405
+ end
2406
+ end
2407
+
2408
+ def _start_source(attrsD)
2409
+ @insource = true
2410
+ end
2411
+
2412
+ def _end_source
2413
+ @insource = false
2414
+ getContext()['source'] = Marshal.load(Marshal.dump(@sourcedata))
2415
+ @sourcedata.clear()
2416
+ end
2417
+
2418
+ def _start_content(attrsD)
2419
+ pushContent('content', attrsD, 'text/plain', true)
2420
+ src = attrsD['src']
2421
+ if src and not src.empty?:
2422
+ @contentparams['src'] = src
2423
+ end
2424
+ push('content', true)
2425
+ end
2426
+
2427
+ def _start_prodlink(attrsD)
2428
+ pushContent('content', attrsD, 'text/html', true)
2429
+ end
2430
+
2431
+ def _start_body(attrsD)
2432
+ pushContent('content', attrsD, 'application/xhtml+xml', true)
2433
+ end
2434
+ alias :_start_xhtml_body :_start_body
2435
+
2436
+ def _start_content_encoded(attrsD)
2437
+ pushContent('content', attrsD, 'text/html', true)
2438
+ end
2439
+ alias :_start_fullitem :_start_content_encoded
2440
+
2441
+ def _end_content
2442
+ copyToDescription = (['text/plain'] + @html_types).include? mapContentType(@contentparams['type'])
2443
+ value = popContent('content')
2444
+ if copyToDescription
2445
+ _save('description', value)
2446
+ end
2447
+ alias :_end_body :_end_content
2448
+ alias :_end_xhtml_body :_end_content
2449
+ alias :_end_content_encoded :_end_content
2450
+ alias :_end_fullitem :_end_content
2451
+ alias :_end_prodlink :_end_content
2452
+ end
2453
+
2454
+ def _start_itunes_image(attrsD)
2455
+ push('itunes_image', false)
2456
+ getContext()['image'] = FeedParserDict.new({'href' => attrsD['href']})
2457
+ end
2458
+ alias :_start_itunes_link :_start_itunes_image
2459
+
2460
+ def _end_itunes_block
2461
+ value = pop('itunes_block', false)
2462
+ getContext()['itunes_block'] = (value == 'yes') and true or false
2463
+ end
2464
+
2465
+ def _end_itunes_explicit
2466
+ value = pop('itunes_explicit', false)
2467
+ getContext()['itunes_explicit'] = (value == 'yes') and true or false
2468
+ end
2469
+
2470
+
2471
+ # ISO-8601 date parsing routines written by Fazal Majid.
2472
+ # The ISO 8601 standard is very convoluted and irregular - a full ISO 8601
2473
+ # parser is beyond the scope of feedparser and the current Time.iso8601
2474
+ # method does not work.
2475
+ # A single regular expression cannot parse ISO 8601 date formats into groups
2476
+ # as the standard is highly irregular (for instance is 030104 2003-01-04 or
2477
+ # 0301-04-01), so we use templates instead.
2478
+ # Please note the order in templates is significant because we need a
2479
+ # greedy match.
2480
+ def _parse_date_iso8601(dateString)
2481
+ # Parse a variety of ISO-8601-compatible formats like 20040105
2482
+
2483
+ # What I'm about to show you may be the ugliest code in all of
2484
+ # rfeedparser.
2485
+ # FIXME The century regexp maybe not work ('\d\d$' says "two numbers at
2486
+ # end of line" but we then attach more of a regexp.
2487
+ iso8601_regexps = [ '^(\d{4})-?([01]\d)-([0123]\d)',
2488
+ '^(\d{4})-([01]\d)',
2489
+ '^(\d{4})-?([0123]\d\d)',
2490
+ '^(\d\d)-?([01]\d)-?([0123]\d)',
2491
+ '^(\d\d)-?([0123]\d\d)',
2492
+ '^(\d{4})',
2493
+ '-(\d\d)-?([01]\d)',
2494
+ '-([0123]\d\d)',
2495
+ '-(\d\d)',
2496
+ '--([01]\d)-?([0123]\d)',
2497
+ '--([01]\d)',
2498
+ '---([0123]\d)',
2499
+ '(\d\d$)',
2500
+ ''
2501
+ ]
2502
+ iso8601_values = { '^(\d{4})-?([01]\d)-([0123]\d)' => ['year', 'month', 'day'],
2503
+ '^(\d{4})-([01]\d)' => ['year','month'],
2504
+ '^(\d{4})-?([0123]\d\d)' => ['year', 'ordinal'],
2505
+ '^(\d\d)-?([01]\d)-?([0123]\d)' => ['year','month','day'],
2506
+ '^(\d\d)-?([0123]\d\d)' => ['year','ordinal'],
2507
+ '^(\d{4})' => ['year'],
2508
+ '-(\d\d)-?([01]\d)' => ['year','month'],
2509
+ '-([0123]\d\d)' => ['ordinal'],
2510
+ '-(\d\d)' => ['year'],
2511
+ '--([01]\d)-?([0123]\d)' => ['month','day'],
2512
+ '--([01]\d)' => ['month'],
2513
+ '---([0123]\d)' => ['day'],
2514
+ '(\d\d$)' => ['century'],
2515
+ '' => []
2516
+ }
2517
+ add_to_all = '(T?(\d\d):(\d\d)(?::(\d\d))?([+-](\d\d)(?::(\d\d))?|Z)?)?'
2518
+ add_to_all_fields = ['hour', 'minute', 'second', 'tz', 'tzhour', 'tzmin']
2519
+ # NOTE We use '(?:' to prevent grouping of optional matches (ones trailed
2520
+ # by '?'). The second ':' *are* matched.
2521
+ m = nil
2522
+ param_keys = []
2523
+ iso8601_regexps.each do |s|
2524
+ $stderr << "Trying iso8601 regexp: #{s+add_to_all}\n" if $debug
2525
+ param_keys = iso8601_values[s] + add_to_all_fields
2526
+ m = dateString.match(Regexp.new(s+add_to_all))
2527
+ break if m
2528
+ end
2529
+ return if m.nil? or (m.begin(0).zero? and m.end(0).zero?)
2530
+
2531
+ param_values = m.to_a
2532
+ param_values = param_values[1..-1]
2533
+ params = {}
2534
+ param_keys.each_with_index do |key,i|
2535
+ params[key] = param_values[i]
2536
+ end
2537
+
2538
+ ordinal = params['ordinal'].to_i unless params['ordinal'].nil?
2539
+ year = params['year'] || '--'
2540
+ if year.nil? or year.empty? or year == '--' # FIXME When could the regexp ever return a year equal to '--'?
2541
+ year = Time.now.utc.year
2542
+ elsif year.length == 2
2543
+ # ISO 8601 assumes current century, i.e. 93 -> 2093, NOT 1993
2544
+ year = 100 * (Time.now.utc.year / 100) + year.to_i
2545
+ else
2546
+ year = year.to_i
2547
+ end
2548
+
2549
+ month = params['month'] || '-'
2550
+ if month.nil? or month.empty? or month == '-'
2551
+ # ordinals are NOT normalized by mktime, we simulate them
2552
+ # by setting month=1, day=ordinal
2553
+ if ordinal
2554
+ month = DateTime.ordinal(year,ordinal).month
2555
+ else
2556
+ month = Time.now.utc.month
2557
+ end
2558
+ end
2559
+ month = month.to_i unless month.nil?
2560
+ day = params['day']
2561
+ if day.nil? or day.empty?
2562
+ # see above
2563
+ if ordinal
2564
+ day = DateTime.ordinal(year,ordinal).day
2565
+ elsif params['century'] or params['year'] or params['month']
2566
+ day = 1
2567
+ else
2568
+ day = Time.now.utc.day
2569
+ end
2570
+ else
2571
+ day = day.to_i
2572
+ end
2573
+ # special case of the century - is the first year of the 21st century
2574
+ # 2000 or 2001 ? The debate goes on...
2575
+ if params.has_key? 'century'
2576
+ year = (params['century'].to_i - 1) * 100 + 1
2577
+ end
2578
+ # in ISO 8601 most fields are optional
2579
+ hour = params['hour'].to_i
2580
+ minute = params['minute'].to_i
2581
+ second = params['second'].to_i
2582
+ weekday = nil
2583
+ # daylight savings is complex, but not needed for feedparser's purposes
2584
+ # as time zones, if specified, include mention of whether it is active
2585
+ # (e.g. PST vs. PDT, CET). Using -1 is implementation-dependent and
2586
+ # and most implementations have DST bugs
2587
+ tm = [second, minute, hour, day, month, year, nil, ordinal, false, nil]
2588
+ tz = params['tz']
2589
+ if tz and not tz.empty? and tz != 'Z'
2590
+ # FIXME does this cross over days?
2591
+ if tz[0] == '-'
2592
+ tm[3] += params['tzhour'].to_i
2593
+ tm[4] += params['tzmin'].to_i
2594
+ elsif tz[0] == '+'
2595
+ tm[3] -= params['tzhour'].to_i
2596
+ tm[4] -= params['tzmin'].to_i
2597
+ else
2598
+ return nil
2599
+ end
2600
+ end
2601
+ return Time.utc(*tm) # Magic!
2602
+
2603
+ end
2604
+
2605
+ def _parse_date_onblog(dateString)
2606
+ # Parse a string according to the OnBlog 8-bit date format
2607
+ # 8-bit date handling routes written by ytrewq1
2608
+ korean_year = u("년") # b3e2 in euc-kr
2609
+ korean_month = u("월") # bff9 in euc-kr
2610
+ korean_day = u("일") # c0cf in euc-kr
2611
+
2612
+
2613
+ korean_onblog_date_re = /(\d{4})#{korean_year}\s+(\d{2})#{korean_month}\s+(\d{2})#{korean_day}\s+(\d{2}):(\d{2}):(\d{2})/
2614
+
2615
+
2616
+ m = korean_onblog_date_re.match(dateString)
2617
+ return unless m
2618
+ w3dtfdate = "#{m[1]}-#{m[2]}-#{m[3]}T#{m[4]}:#{m[5]}:#{m[6]}+09:00"
2619
+
2620
+ $stderr << "OnBlog date parsed as: %s\n" % w3dtfdate if $debug
2621
+ return _parse_date_w3dtf(w3dtfdate)
2622
+ end
2623
+
2624
+ def _parse_date_nate(dateString)
2625
+ # Parse a string according to the Nate 8-bit date format
2626
+ # 8-bit date handling routes written by ytrewq1
2627
+ korean_am = u("오전") # bfc0 c0fc in euc-kr
2628
+ korean_pm = u("오후") # bfc0 c8c4 in euc-kr
2629
+
2630
+ korean_nate_date_re = /(\d{4})-(\d{2})-(\d{2})\s+(#{korean_am}|#{korean_pm})\s+(\d{0,2}):(\d{0,2}):(\d{0,2})/
2631
+ m = korean_nate_date_re.match(dateString)
2632
+ return unless m
2633
+ hour = m[5].to_i
2634
+ ampm = m[4]
2635
+ if ampm == korean_pm
2636
+ hour += 12
2637
+ end
2638
+ hour = hour.to_s.rjust(2,'0')
2639
+ w3dtfdate = "#{m[1]}-#{m[2]}-#{m[3]}T#{hour}:#{m[6]}:#{m[7]}+09:00"
2640
+ $stderr << "Nate date parsed as: %s\n" % w3dtfdate if $debug
2641
+ return _parse_date_w3dtf(w3dtfdate)
2642
+ end
2643
+
2644
+ def _parse_date_mssql(dateString)
2645
+ mssql_date_re = /(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})(\.\d+)?/
2646
+
2647
+ m = mssql_date_re.match(dateString)
2648
+ return unless m
2649
+ w3dtfdate = "#{m[1]}-#{m[2]}-#{m[3]}T#{m[4]}:#{m[5]}:#{m[6]}+09:00"
2650
+ $stderr << "MS SQL date parsed as: %s\n" % w3dtfdate if $debug
2651
+ return _parse_date_w3dtf(w3dtfdate)
2652
+ end
2653
+
2654
+ def _parse_date_greek(dateString)
2655
+ # Parse a string according to a Greek 8-bit date format
2656
+ # Unicode strings for Greek date strings
2657
+ greek_months = {
2658
+ u("Ιαν") => u("Jan"), # c9e1ed in iso-8859-7
2659
+ u("Φεβ") => u("Feb"), # d6e5e2 in iso-8859-7
2660
+ u("Μάώ") => u("Mar"), # ccdcfe in iso-8859-7
2661
+ u("Μαώ") => u("Mar"), # cce1fe in iso-8859-7
2662
+ u("Απρ") => u("Apr"), # c1f0f1 in iso-8859-7
2663
+ u("Μάι") => u("May"), # ccdce9 in iso-8859-7
2664
+ u("Μαϊ") => u("May"), # cce1fa in iso-8859-7
2665
+ u("Μαι") => u("May"), # cce1e9 in iso-8859-7
2666
+ u("Ιούν") => u("Jun"), # c9effded in iso-8859-7
2667
+ u("Ιον") => u("Jun"), # c9efed in iso-8859-7
2668
+ u("Ιούλ") => u("Jul"), # c9effdeb in iso-8859-7
2669
+ u("Ιολ") => u("Jul"), # c9f9eb in iso-8859-7
2670
+ u("Αύγ") => u("Aug"), # c1fde3 in iso-8859-7
2671
+ u("Αυγ") => u("Aug"), # c1f5e3 in iso-8859-7
2672
+ u("Σεπ") => u("Sep"), # d3e5f0 in iso-8859-7
2673
+ u("Οκτ") => u("Oct"), # cfeaf4 in iso-8859-7
2674
+ u("Νοέ") => u("Nov"), # cdefdd in iso-8859-7
2675
+ u("Νοε") => u("Nov"), # cdefe5 in iso-8859-7
2676
+ u("Δεκ") => u("Dec"), # c4e5ea in iso-8859-7
2677
+ }
2678
+
2679
+ greek_wdays = {
2680
+ u("Κυρ") => u("Sun"), # caf5f1 in iso-8859-7
2681
+ u("Δευ") => u("Mon"), # c4e5f5 in iso-8859-7
2682
+ u("Τρι") => u("Tue"), # d4f1e9 in iso-8859-7
2683
+ u("Τετ") => u("Wed"), # d4e5f4 in iso-8859-7
2684
+ u("Πεμ") => u("Thu"), # d0e5ec in iso-8859-7
2685
+ u("Παρ") => u("Fri"), # d0e1f1 in iso-8859-7
2686
+ u("Σαβ") => u("Sat"), # d3e1e2 in iso-8859-7
2687
+ }
2688
+
2689
+ greek_date_format = /([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)/
2690
+
2691
+ m = greek_date_format.match(dateString)
2692
+ return unless m
2693
+ begin
2694
+ wday = greek_wdays[m[1]]
2695
+ month = greek_months[m[3]]
2696
+ rescue
2697
+ return nil
2698
+ end
2699
+ rfc822date = "#{wday}, #{m[2]} #{month} #{m[4]} #{m[5]}:#{m[6]}:#{m[7]} #{m[8]}"
2700
+ $stderr << "Greek date parsed as: #{rfc822date}\n" if $debug
2701
+ return _parse_date_rfc822(rfc822date)
2702
+ end
2703
+
2704
+ def _parse_date_hungarian(dateString)
2705
+ # Parse a string according to a Hungarian 8-bit date format.
2706
+ hungarian_date_format_re = /(\d{4})-([^-]+)-(\d{0,2})T(\d{0,2}):(\d{2})((\+|-)(\d{0,2}:\d{2}))/
2707
+ m = hungarian_date_format_re.match(dateString)
2708
+ return unless m
2709
+
2710
+ # Unicode strings for Hungarian date strings
2711
+ hungarian_months = {
2712
+ u("január") => u("01"), # e1 in iso-8859-2
2713
+ u("februári") => u("02"), # e1 in iso-8859-2
2714
+ u("március") => u("03"), # e1 in iso-8859-2
2715
+ u("április") => u("04"), # e1 in iso-8859-2
2716
+ u("máujus") => u("05"), # e1 in iso-8859-2
2717
+ u("június") => u("06"), # fa in iso-8859-2
2718
+ u("július") => u("07"), # fa in iso-8859-2
2719
+ u("augusztus") => u("08"),
2720
+ u("szeptember") => u("09"),
2721
+ u("október") => u("10"), # f3 in iso-8859-2
2722
+ u("november") => u("11"),
2723
+ u("december") => u("12"),
2724
+ }
2725
+ begin
2726
+ month = hungarian_months[m[2]]
2727
+ day = m[3].rjust(2,'0')
2728
+ hour = m[4].rjust(2,'0')
2729
+ rescue
2730
+ return
2731
+ end
2732
+
2733
+ w3dtfdate = "#{m[1]}-#{month}-#{day}T#{hour}:#{m[5]}:00#{m[6]}"
2734
+ $stderr << "Hungarian date parsed as: #{w3dtfdate}\n" if $debug
2735
+ return _parse_date_w3dtf(w3dtfdate)
2736
+ end
2737
+
2738
+ def rollover(num, modulus)
2739
+ return num % modulus, num / modulus
2740
+ end
2741
+
2742
+ def set_self(num, modulus)
2743
+ r = num / modulus
2744
+ if r == 0
2745
+ return num
2746
+ end
2747
+ return r
2748
+ end
2749
+ # W3DTF-style date parsing
2750
+ # FIXME shouldn't it be "W3CDTF"?
2751
+ def _parse_date_w3dtf(dateString)
2752
+ # Ruby's Time docs claim w3cdtf is an alias for iso8601 which is an alias for xmlschema
2753
+ # Whatever it is, it doesn't work. This has been fixed in Ruby 1.9 and
2754
+ # in Ruby on Rails, but not really. They don't fix the 25 hour or 61 minute or 61 second rollover and fail in other ways.
2755
+
2756
+ m = dateString.match(/^(\d{4})-?(?:(?:([01]\d)-?(?:([0123]\d)(?:T(\d\d):(\d\d):(\d\d)([+-]\d\d:\d\d|Z))?)?)?)?/)
2757
+
2758
+ w3 = m[1..3].map{|s| s=s.to_i; s += 1 if s == 0;s} # Map the year, month and day to integers and, if they were nil, set them to 1
2759
+ w3 += m[4..6].map{|s| s.to_i} # Map the hour, minute and second to integers
2760
+ w3 << m[-1] # Leave the timezone as a String
2761
+
2762
+ # FIXME this next bit needs some serious refactoring
2763
+ # Rollover times. 0 minutes and 61 seconds -> 1 minute and 1 second
2764
+ w3[5],r = rollover(w3[5], 60) # rollover seconds
2765
+ w3[4] += r
2766
+ w3[4],r = rollover(w3[4], 60) # rollover minutes
2767
+ w3[3] += r
2768
+ w3[3],r = rollover(w3[3], 24) # rollover hours
2769
+
2770
+ w3[2] = w3[2] + r
2771
+ if w3[1] > 12
2772
+ w3[1],r = rollover(w3[1],12)
2773
+ w3[1] = 12 if w3[1] == 0
2774
+ w3[0] += r
2775
+ end
2776
+
2777
+ num_days = Time.days_in_month(w3[1], w3[0])
2778
+ while w3[2] > num_days
2779
+ w3[2] -= num_days
2780
+ w3[1] += 1
2781
+ if w3[1] > 12
2782
+ w3[0] += 1
2783
+ w3[1] = set_self(w3[1], 12)
2784
+ end
2785
+ num_days = Time.days_in_month(w3[1], w3[0])
2786
+ end
2787
+
2788
+
2789
+ unless w3[6].class != String
2790
+ if /^-/ =~ w3[6] # Zone offset goes backwards
2791
+ w3[6][0] = '+'
2792
+ elsif /^\+/ =~ w3[6]
2793
+ w3[6][0] = '-'
2794
+ end
2795
+ end
2796
+ return Time.utc(w3[0], w3[1], w3[2] , w3[3], w3[4], w3[5])+Time.zone_offset(w3[6] || "UTC")
2797
+ end
2798
+
2799
+ def _parse_date_rfc822(dateString)
2800
+ # Parse an RFC822, RFC1123, RFC2822 or asctime-style date
2801
+ # These first few lines are to fix up the stupid proprietary format from Disney
2802
+ unknown_timezones = { 'AT' => 'EDT', 'ET' => 'EST',
2803
+ 'CT' => 'CST', 'MT' => 'MST',
2804
+ 'PT' => 'PST'
2805
+ }
2806
+
2807
+ mon = dateString.split[2]
2808
+ if mon.length > 3 and Time::RFC2822_MONTH_NAME.include?mon[0..2]
2809
+ dateString.sub!(mon,mon[0..2])
2810
+ end
2811
+ if dateString[-3..-1] != "GMT" and unknown_timezones[dateString[-2..-1]]
2812
+ dateString[-2..-1] = unknown_timezones[dateString[-2..-1]]
2813
+ end
2814
+ # Okay, the Disney date format should be fixed up now.
2815
+ rfc = dateString.match(/([A-Za-z]{3}), ([0123]\d) ([A-Za-z]{3}) (\d{4})( (\d\d):(\d\d)(?::(\d\d))? ([A-Za-z]{3}))?/)
2816
+ if rfc.to_a.length > 1 and rfc.to_a.include? nil
2817
+ dow, day, mon, year, hour, min, sec, tz = rfc[1..-1]
2818
+ hour,min,sec = [hour,min,sec].map{|e| e.to_s.rjust(2,'0') }
2819
+ tz ||= "GMT"
2820
+ end
2821
+ asctime_match = dateString.match(/([A-Za-z]{3}) ([A-Za-z]{3}) (\d?\d) (\d\d):(\d\d):(\d\d) ([A-Za-z]{3}) (\d\d\d\d)/).to_a
2822
+ if asctime_match.to_a.length > 1
2823
+ # Month-abbr dayofmonth hour:minute:second year
2824
+ dow, mon, day, hour, min, sec, tz, year = asctime_match[1..-1]
2825
+ day.to_s.rjust(2,'0')
2826
+ end
2827
+ if (rfc.to_a.length > 1 and rfc.to_a.include? nil) or asctime_match.to_a.length > 1
2828
+ ds = "#{dow}, #{day} #{mon} #{year} #{hour}:#{min}:#{sec} #{tz}"
2829
+ else
2830
+ ds = dateString
2831
+ end
2832
+ t = Time.rfc2822(ds).utc
2833
+ return t
2834
+ end
2835
+
2836
+ def _parse_date_perforce(aDateString) # FIXME not in 4.1?
2837
+ # Parse a date in yyyy/mm/dd hh:mm:ss TTT format
2838
+ # Note that there is a day of the week at the beginning
2839
+ # Ex. Fri, 2006/09/15 08:19:53 EDT
2840
+ return Time.parse(aDateString).utc
2841
+ end
2842
+
2843
+ def extract_tuple(atime)
2844
+ # NOTE leave the error handling to parse_date
2845
+ t = [atime.year, atime.month, atime.mday, atime.hour,
2846
+ atime.min, atime.sec, (atime.wday-1) % 7, atime.yday,
2847
+ atime.isdst
2848
+ ]
2849
+ # yay for modulus! yaaaaaay! its 530 am and i should be sleeping! yaay!
2850
+ t[0..-2].map!{|s| s.to_i}
2851
+ t[-1] = t[-1] ? 1 : 0
2852
+ return t
2853
+ end
2854
+
2855
+ def parse_date(dateString)
2856
+ @date_handlers.each do |handler|
2857
+ begin
2858
+ $stderr << "Trying date_handler #{handler}\n" if $debug
2859
+ datething = extract_tuple(send(handler,dateString))
2860
+ return datething
2861
+ rescue Exception => e
2862
+ $stderr << "#{handler} raised #{e}\n" if $debug
2863
+ end
2864
+ end
2865
+ return nil
2866
+ end
2867
+
2868
+ end # End FeedParserMixin
2869
+
2870
+ class StrictFeedParser < XML::SAX::HandlerBase # expat
2871
+ include FeedParserMixin
2872
+
2873
+ attr_accessor :bozo, :entries, :feeddata, :exc
2874
+ def initialize(baseuri, baselang, encoding)
2875
+ $stderr << "trying StrictFeedParser\n" if $debug
2876
+ startup(baseuri, baselang, encoding)
2877
+ @bozo = false
2878
+ @exc = nil
2879
+ super()
2880
+ end
2881
+
2882
+ def getPos
2883
+ [@locator.getSystemId, @locator.getLineNumber]
2884
+ end
2885
+
2886
+ def getAttrs(attrs)
2887
+ ret = []
2888
+ for i in 0..attrs.getLength
2889
+ ret.push([attrs.getName(i), attrs.getValue(i)])
2890
+ end
2891
+ ret
2892
+ end
2893
+
2894
+ def setDocumentLocator(loc)
2895
+ @locator = loc
2896
+ end
2897
+
2898
+ def startDoctypeDecl(name, pub_sys, long_name, uri)
2899
+ #Nothing is done here. What could we do that is neat and useful?
2900
+ end
2901
+
2902
+ def startNamespaceDecl(prefix, uri)
2903
+ trackNamespace(prefix, uri)
2904
+ end
2905
+
2906
+ def endNamespaceDecl(prefix)
2907
+ end
2908
+
2909
+ def startElement(name, attrs)
2910
+ name =~ /^(([^;]*);)?(.+)$/ # Snag namespaceuri from name
2911
+ namespaceuri = ($2 || '').downcase
2912
+ name = $3
2913
+ if /backend\.userland\.com\/rss/ =~ namespaceuri
2914
+ # match any backend.userland.com namespace
2915
+ namespaceuri = 'http://backend.userland.com/rss'
2916
+ end
2917
+ prefix = @matchnamespaces[namespaceuri]
2918
+ # No need to raise UndeclaredNamespace, Expat does that for us with
2919
+ "unbound prefix (XMLParserError)"
2920
+ if prefix and not prefix.empty?
2921
+ name = prefix + ':' + name
2922
+ end
2923
+ name.downcase!
2924
+ unknown_starttag(name, attrs)
2925
+ end
2926
+
2927
+ def character(text, start, length)
2928
+ #handle_data(CGI.unescapeHTML(text))
2929
+ handle_data(text)
2930
+ end
2931
+ # expat provides "character" not "characters"!
2932
+ alias :characters :character # Just in case.
2933
+
2934
+ def startCdata(content)
2935
+ handle_data(content)
2936
+ end
2937
+
2938
+ def endElement(name)
2939
+ name =~ /^(([^;]*);)?(.+)$/ # Snag namespaceuri from name
2940
+ namespaceuri = ($2 || '').downcase
2941
+ prefix = @matchnamespaces[namespaceuri]
2942
+ if prefix and not prefix.empty?
2943
+ localname = prefix + ':' + name
2944
+ end
2945
+ name.downcase!
2946
+ unknown_endtag(name)
2947
+ end
2948
+
2949
+ def comment(comment)
2950
+ handle_comment(comment)
2951
+ end
2952
+
2953
+ def entityDecl(*foo)
2954
+ end
2955
+
2956
+ def unparsedEntityDecl(*foo)
2957
+ end
2958
+ def error(exc)
2959
+ @bozo = true
2960
+ @exc = exc
2961
+ end
2962
+
2963
+ def fatalError(exc)
2964
+ error(exc)
2965
+ raise exc
2966
+ end
2967
+ end
2968
+
2969
+ class LooseFeedParser < BetterSGMLParser
2970
+ include FeedParserMixin
2971
+ # We write the methods that were in BaseHTMLProcessor in the python code
2972
+ # in here directly. We do this because if we inherited from
2973
+ # BaseHTMLProcessor but then included from FeedParserMixin, the methods
2974
+ # of Mixin would overwrite the methods we inherited from
2975
+ # BaseHTMLProcessor. This is exactly the opposite of what we want to
2976
+ # happen!
2977
+
2978
+ attr_accessor :encoding, :bozo, :feeddata, :entries, :namespacesInUse
2979
+
2980
+ Elements_No_End_Tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
2981
+ 'img', 'input', 'isindex', 'link', 'meta', 'param']
2982
+ New_Declname_Re = /[a-zA-Z][-_.a-zA-Z0-9:]*\s*/
2983
+ alias :sgml_feed :feed # feed needs to mapped to feeddata, not the SGMLParser method feed. I think.
2984
+ def feed
2985
+ @feeddata
2986
+ end
2987
+ def feed=(data)
2988
+ @feeddata = data
2989
+ end
2990
+
2991
+ def initialize(baseuri, baselang, encoding)
2992
+ startup(baseuri, baselang, encoding)
2993
+ super() # Keep the parentheses! No touchy.
2994
+ end
2995
+
2996
+ def reset
2997
+ @pieces = []
2998
+ super
2999
+ end
3000
+
3001
+ def parse(data)
3002
+ data.gsub!(/<!((?!DOCTYPE|--|\[))/i, '&lt;!\1')
3003
+ data.gsub!(/<([^<\s]+?)\s*\/>/) do |tag|
3004
+ clean = tag[1..-3].strip
3005
+ if Elements_No_End_Tag.include?clean
3006
+ tag
3007
+ else
3008
+ '<'+clean+'></'+clean+'>'
3009
+ end
3010
+ end
3011
+
3012
+ data.gsub!(/&#39;/, "'")
3013
+ data.gsub!(/&#34;/, "'")
3014
+ if @encoding and not @encoding.empty? # FIXME unicode check type(u'')
3015
+ data = uconvert(data,'utf-8',@encoding)
3016
+ end
3017
+ sgml_feed(data) # see the alias above
3018
+ end
3019
+
3020
+
3021
+ def decodeEntities(element, data)
3022
+ data.gsub!('&#60;', '&lt;')
3023
+ data.gsub!('&#x3c;', '&lt;')
3024
+ data.gsub!('&#62;', '&gt;')
3025
+ data.gsub!('&#x3e;', '&gt;')
3026
+ data.gsub!('&#38;', '&amp;')
3027
+ data.gsub!('&#x26;', '&amp;')
3028
+ data.gsub!('&#34;', '&quot;')
3029
+ data.gsub!('&#x22;', '&quot;')
3030
+ data.gsub!('&#39;', '&apos;')
3031
+ data.gsub!('&#x27;', '&apos;')
3032
+ if @contentparams.has_key? 'type' and not ((@contentparams['type'] || 'xml') =~ /xml$/u)
3033
+ data.gsub!('&lt;', '<')
3034
+ data.gsub!('&gt;', '>')
3035
+ data.gsub!('&amp;', '&')
3036
+ data.gsub!('&quot;', '"')
3037
+ data.gsub!('&apos;', "'")
3038
+ end
3039
+ return data
3040
+ end
3041
+ end
3042
+
3043
+ def FeedParser.resolveRelativeURIs(htmlSource, baseURI, encoding)
3044
+ $stderr << "entering resolveRelativeURIs\n" if $debug # FIXME write a decent logger
3045
+ relative_uris = [ ['a','href'],
3046
+ ['applet','codebase'],
3047
+ ['area','href'],
3048
+ ['blockquote','cite'],
3049
+ ['body','background'],
3050
+ ['del','cite'],
3051
+ ['form','action'],
3052
+ ['frame','longdesc'],
3053
+ ['frame','src'],
3054
+ ['iframe','longdesc'],
3055
+ ['iframe','src'],
3056
+ ['head','profile'],
3057
+ ['img','longdesc'],
3058
+ ['img','src'],
3059
+ ['img','usemap'],
3060
+ ['input','src'],
3061
+ ['input','usemap'],
3062
+ ['ins','cite'],
3063
+ ['link','href'],
3064
+ ['object','classid'],
3065
+ ['object','codebase'],
3066
+ ['object','data'],
3067
+ ['object','usemap'],
3068
+ ['q','cite'],
3069
+ ['script','src'],
3070
+ ]
3071
+ h = Hpricot(htmlSource)
3072
+ relative_uris.each do |l|
3073
+ ename, eattr = l
3074
+ h.search(ename).each do |elem|
3075
+ euri = elem.attributes[eattr]
3076
+ if euri and not euri.empty? and URI.parse(euri).relative?
3077
+ elem.attributes[eattr] = urljoin(baseURI, euri)
3078
+ end
3079
+ end
3080
+ end
3081
+ return h.to_html
3082
+ end
3083
+
3084
+ class SanitizerDoc < Hpricot::Doc
3085
+
3086
+ def scrub
3087
+ traverse_all_element do |e|
3088
+ if e.elem?
3089
+ if Acceptable_Elements.include?e.name
3090
+ e.strip_attributes
3091
+ else
3092
+ if Unacceptable_Elements_With_End_Tag.include?e.name
3093
+ e.inner_html = ''
3094
+ end
3095
+ e.swap(SanitizerDoc.new(e.children).scrub.to_html)
3096
+ # This works because the children swapped in are brought in "after" the current element.
3097
+ end
3098
+ elsif e.doctype?
3099
+ e.parent.children.delete(e)
3100
+ elsif e.text?
3101
+ ets = e.to_s
3102
+ ets.gsub!(/&#39;/, "'")
3103
+ ets.gsub!(/&#34;/, '"')
3104
+ ets.gsub!(/\r/,'')
3105
+ e.swap(ets)
3106
+ else
3107
+ end
3108
+ end
3109
+ # yes, that '/' should be there. It's a search method. See the Hpricot docs.
3110
+
3111
+ unless $compatible # FIXME not properly recursive, see comment in recursive_strip
3112
+ (self/tag).strip_style(@config[:allow_css_properties], @config[:allow_css_keywords])
3113
+ end
3114
+ return self
3115
+ end
3116
+ end
3117
+
3118
+ def SanitizerDoc(html)
3119
+ FeedParser::SanitizerDoc.new(Hpricot.make(html))
3120
+ end
3121
+ module_function(:SanitizerDoc)
3122
+ def self.sanitizeHTML(html,encoding)
3123
+ # FIXME Tidy not yet supported
3124
+ html = html.gsub(/<!((?!DOCTYPE|--|\[))/, '&lt;!\1')
3125
+ h = SanitizerDoc(html)
3126
+ h = h.scrub
3127
+ return h.to_html.strip
3128
+ end
3129
+
3130
+
3131
+
3132
+ def self.getCharacterEncoding(feed, xml_data)
3133
+ # Get the character encoding of the XML document
3134
+ $stderr << "In getCharacterEncoding\n" if $debug
3135
+ sniffed_xml_encoding = nil
3136
+ xml_encoding = nil
3137
+ true_encoding = nil
3138
+ begin
3139
+ http_headers = feed.meta
3140
+ http_content_type = feed.meta['content-type'].split(';')[0]
3141
+ encoding_scan = feed.meta['content-type'].to_s.scan(/charset\s*=\s*(.*?)(?:"|')*$/)
3142
+ http_encoding = encoding_scan.flatten[0].to_s.gsub(/("|')/,'')
3143
+ http_encoding = nil if http_encoding.empty?
3144
+ # FIXME Open-Uri returns iso8859-1 if there is no charset header,
3145
+ # but that doesn't pass the tests. Open-Uri claims its following
3146
+ # the right RFC. Are they wrong or do we need to change the tests?
3147
+ rescue NoMethodError
3148
+ http_headers = {}
3149
+ http_content_type = nil
3150
+ http_encoding = nil
3151
+ end
3152
+ # Must sniff for non-ASCII-compatible character encodings before
3153
+ # searching for XML declaration. This heuristic is defined in
3154
+ # section F of the XML specification:
3155
+ # http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
3156
+ begin
3157
+ if xml_data[0..3] == "\x4c\x6f\xa7\x94"
3158
+ # EBCDIC
3159
+ xml_data = _ebcdic_to_ascii(xml_data)
3160
+ elsif xml_data[0..3] == "\x00\x3c\x00\x3f"
3161
+ # UTF-16BE
3162
+ sniffed_xml_encoding = 'utf-16be'
3163
+ xml_data = uconvert(xml_data, 'utf-16be', 'utf-8')
3164
+ elsif xml_data.size >= 4 and xml_data[0..1] == "\xfe\xff" and xml_data[2..3] != "\x00\x00"
3165
+ # UTF-16BE with BOM
3166
+ sniffed_xml_encoding = 'utf-16be'
3167
+ xml_data = uconvert(xml_data[2..-1], 'utf-16be', 'utf-8')
3168
+ elsif xml_data[0..3] == "\x3c\x00\x3f\x00"
3169
+ # UTF-16LE
3170
+ sniffed_xml_encoding = 'utf-16le'
3171
+ xml_data = uconvert(xml_data, 'utf-16le', 'utf-8')
3172
+ elsif xml_data.size >=4 and xml_data[0..1] == "\xff\xfe" and xml_data[2..3] != "\x00\x00"
3173
+ # UTF-16LE with BOM
3174
+ sniffed_xml_encoding = 'utf-16le'
3175
+ xml_data = uconvert(xml_data[2..-1], 'utf-16le', 'utf-8')
3176
+ elsif xml_data[0..3] == "\x00\x00\x00\x3c"
3177
+ # UTF-32BE
3178
+ sniffed_xml_encoding = 'utf-32be'
3179
+ xml_data = uconvert(xml_data, 'utf-32be', 'utf-8')
3180
+ elsif xml_data[0..3] == "\x3c\x00\x00\x00"
3181
+ # UTF-32LE
3182
+ sniffed_xml_encoding = 'utf-32le'
3183
+ xml_data = uconvert(xml_data, 'utf-32le', 'utf-8')
3184
+ elsif xml_data[0..3] == "\x00\x00\xfe\xff"
3185
+ # UTF-32BE with BOM
3186
+ sniffed_xml_encoding = 'utf-32be'
3187
+ xml_data = uconvert(xml_data[4..-1], 'utf-32BE', 'utf-8')
3188
+ elsif xml_data[0..3] == "\xff\xfe\x00\x00"
3189
+ # UTF-32LE with BOM
3190
+ sniffed_xml_encoding = 'utf-32le'
3191
+ xml_data = uconvert(xml_data[4..-1], 'utf-32le', 'utf-8')
3192
+ elsif xml_data[0..2] == "\xef\xbb\xbf"
3193
+ # UTF-8 with BOM
3194
+ sniffed_xml_encoding = 'utf-8'
3195
+ xml_data = xml_data[3..-1]
3196
+ else
3197
+ # ASCII-compatible
3198
+ end
3199
+ xml_encoding_match = /^<\?.*encoding=[\'"](.*?)[\'"].*\?>/.match(xml_data)
3200
+ rescue
3201
+ xml_encoding_match = nil
3202
+ end
3203
+ if xml_encoding_match
3204
+ xml_encoding = xml_encoding_match[1].downcase
3205
+ xencodings = ['iso-10646-ucs-2', 'ucs-2', 'csunicode', 'iso-10646-ucs-4', 'ucs-4', 'csucs4', 'utf-16', 'utf-32', 'utf_16', 'utf_32', 'utf16', 'u16']
3206
+ if sniffed_xml_encoding and xencodings.include?xml_encoding
3207
+ xml_encoding = sniffed_xml_encoding
3208
+ end
3209
+ end
3210
+
3211
+ acceptable_content_type = false
3212
+ application_content_types = ['application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity']
3213
+ text_content_types = ['text/xml', 'text/xml-external-parsed-entity']
3214
+
3215
+ if application_content_types.include?(http_content_type) or (/^application\// =~ http_content_type and /\+xml$/ =~ http_content_type)
3216
+ acceptable_content_type = true
3217
+ true_encoding = http_encoding || xml_encoding || 'utf-8'
3218
+ elsif text_content_types.include?(http_content_type) or (/^text\// =~ http_content_type and /\+xml$/ =~ http_content_type)
3219
+ acceptable_content_type = true
3220
+ true_encoding = http_encoding || 'us-ascii'
3221
+ elsif /^text\// =~ http_content_type
3222
+ true_encoding = http_encoding || 'us-ascii'
3223
+ elsif http_headers and not http_headers.empty? and not http_headers.has_key?'content-type'
3224
+ true_encoding = xml_encoding || 'iso-8859-1'
3225
+ else
3226
+ true_encoding = xml_encoding || 'utf-8'
3227
+ end
3228
+ return true_encoding, http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type
3229
+ end
3230
+
3231
+ def self.toUTF8(data, encoding)
3232
+ =begin
3233
+ Changes an XML data stream on the fly to specify a new encoding
3234
+
3235
+ data is a raw sequence of bytes (not Unicode) that is presumed to be in %encoding already
3236
+ encoding is a string recognized by encodings.aliases
3237
+ =end
3238
+ $stderr << "entering self.toUTF8, trying encoding %s\n" % encoding if $debug
3239
+ # NOTE we must use double quotes when dealing with \x encodings!
3240
+ if (data.size >= 4 and data[0..1] == "\xfe\xff" and data[2..3] != "\x00\x00")
3241
+ if $debug
3242
+ $stderr << "stripping BOM\n"
3243
+ if encoding != 'utf-16be'
3244
+ $stderr << "string utf-16be instead\n"
3245
+ end
3246
+ end
3247
+ encoding = 'utf-16be'
3248
+ data = data[2..-1]
3249
+ elsif (data.size >= 4 and data[0..1] == "\xff\xfe" and data[2..3] != "\x00\x00")
3250
+ if $debug
3251
+ $stderr << "stripping BOM\n"
3252
+ $stderr << "trying utf-16le instead\n" if encoding != 'utf-16le'
3253
+ end
3254
+ encoding = 'utf-16le'
3255
+ data = data[2..-1]
3256
+ elsif (data[0..2] == "\xef\xbb\xbf")
3257
+ if $debug
3258
+ $stderr << "stripping BOM\n"
3259
+ $stderr << "trying utf-8 instead\n" if encoding != 'utf-8'
3260
+ end
3261
+ encoding = 'utf-8'
3262
+ data = data[3..-1]
3263
+ elsif (data[0..3] == "\x00\x00\xfe\xff")
3264
+ if $debug
3265
+ $stderr << "stripping BOM\n"
3266
+ if encoding != 'utf-32be'
3267
+ $stderr << "trying utf-32be instead\n"
3268
+ end
3269
+ end
3270
+ encoding = 'utf-32be'
3271
+ data = data[4..-1]
3272
+ elsif (data[0..3] == "\xff\xfe\x00\x00")
3273
+ if $debug
3274
+ $stderr << "stripping BOM\n"
3275
+ if encoding != 'utf-32le'
3276
+ $stderr << "trying utf-32le instead\n"
3277
+ end
3278
+ end
3279
+ encoding = 'utf-32le'
3280
+ data = data[4..-1]
3281
+ end
3282
+ begin
3283
+ newdata = uconvert(data, encoding, 'utf-8')
3284
+ rescue => details
3285
+ end
3286
+ $stderr << "successfully converted #{encoding} data to utf-8\n" if $debug
3287
+ declmatch = /^<\?xml[^>]*?>/
3288
+ newdecl = "<?xml version=\'1.0\' encoding=\'utf-8\'?>"
3289
+ if declmatch =~ newdata
3290
+ newdata.sub!(declmatch, newdecl)
3291
+ else
3292
+ newdata = newdecl + "\n" + newdata
3293
+ end
3294
+ return newdata
3295
+ end
3296
+
3297
+ def self.stripDoctype(data)
3298
+ =begin
3299
+ Strips DOCTYPE from XML document, returns (rss_version, stripped_data)
3300
+
3301
+ rss_version may be 'rss091n' or None
3302
+ stripped_data is the same XML document, minus the DOCTYPE
3303
+ =end
3304
+ entity_pattern = /<!ENTITY(.*?)>/m # m is for Regexp::MULTILINE
3305
+ data = data.gsub(entity_pattern,'')
3306
+
3307
+ doctype_pattern = /<!DOCTYPE(.*?)>/m
3308
+ doctype_results = data.scan(doctype_pattern)
3309
+ if doctype_results and doctype_results[0]
3310
+ doctype = doctype_results[0][0]
3311
+ else
3312
+ doctype = ''
3313
+ end
3314
+
3315
+ if /netscape/ =~ doctype.downcase
3316
+ version = 'rss091n'
3317
+ else
3318
+ version = nil
3319
+ end
3320
+ data = data.sub(doctype_pattern, '')
3321
+ return version, data
3322
+ end
3323
+
3324
+ def parse(*args); FeedParser.parse(*args); end
3325
+ def FeedParser.parse(furi, options={})
3326
+ # Parse a feed from a URL, file, stream or string
3327
+ $compatible = options[:compatible] || $compatible # Use the default compatibility if compatible is nil
3328
+ result = FeedParserDict.new
3329
+ result['feed'] = FeedParserDict.new
3330
+ result['entries'] = []
3331
+ if options[:modified]
3332
+ options[:modified] = Time.parse(options[:modified]).rfc2822
3333
+ # FIXME this ignores all of our time parsing work. Does it matter?
3334
+ end
3335
+ result['bozo'] = false
3336
+ handlers = options[:handlers]
3337
+
3338
+ if handlers.class != Array # FIXME why does this happen?
3339
+ handlers = [handlers]
3340
+ end
3341
+
3342
+ begin
3343
+ if URI::parse(furi).class == URI::Generic
3344
+ f = open(furi) # OpenURI doesn't behave well when passing HTTP options to a file.
3345
+ else
3346
+ # And when you do pass them, make sure they aren't just nil (this still true?)
3347
+ newd = {}
3348
+ newd["If-None-Match"] = options[:etag] unless options[:etag].nil?
3349
+ newd["If-Modified-Since"] = options[:modified] unless options[:modified].nil?
3350
+ newd["User-Agent"] = (options[:agent] || USER_AGENT).to_s
3351
+ newd["Referer"] = options[:referrer] unless options[:referrer].nil?
3352
+ newd["Content-Location"] = options[:content_location] unless options[:content_location].nil?
3353
+ newd["Content-Language"] = options[:content_language] unless options[:content_language].nil?
3354
+ newd["Content-type"] = options[:content_type] unless options[:content_type].nil?
3355
+
3356
+ f = open(furi, newd)
3357
+ end
3358
+
3359
+ data = f.read
3360
+ f.close
3361
+ rescue => e
3362
+ $stderr << "Rescued in parse: "+e.to_s+"\n" if $debug # My addition
3363
+ result['bozo'] = true
3364
+ result['bozo_exception'] = e
3365
+ data = ''
3366
+ f = nil
3367
+ end
3368
+ begin
3369
+ if f.meta
3370
+ result['etag'] = options[:etag] || f.meta['etag']
3371
+ result['modified'] = options[:modified] || f.last_modified
3372
+ result['url'] = f.base_uri.to_s
3373
+ result['status'] = f.status[0] || 200
3374
+ result['headers'] = f.meta
3375
+ result['headers']['content-location'] ||= options[:content_location] unless options[:content_location].nil?
3376
+ result['headers']['content-language'] ||= options[:content_language] unless options[:content_language].nil?
3377
+ result['headers']['content-type'] ||= options[:content_type] unless options[:content_type].nil?
3378
+ end
3379
+ rescue NoMethodError
3380
+ result['headers'] = {}
3381
+ result['etag'] = result['headers']['etag'] = options[:etag] unless options[:etag].nil?
3382
+ result['modified'] = result['headers']['last-modified'] = options[:modified] unless options[:modified].nil?
3383
+ unless options[:content_location].nil?
3384
+ result['headers']['content-location'] = options[:content_location]
3385
+ end
3386
+ unless options[:content_language].nil?
3387
+ result['headers']['content-language'] = options[:content_language]
3388
+ end
3389
+ unless options[:content_type].nil?
3390
+ result['headers']['content-type'] = options[:content_type]
3391
+ end
3392
+ end
3393
+
3394
+
3395
+ # there are four encodings to keep track of:
3396
+ # - http_encoding is the encoding declared in the Content-Type HTTP header
3397
+ # - xml_encoding is the encoding declared in the <?xml declaration
3398
+ # - sniffed_encoding is the encoding sniffed from the first 4 bytes of the XML data
3399
+ # - result['encoding'] is the actual encoding, as per RFC 3023 and a variety of other conflicting specifications
3400
+ http_headers = result['headers']
3401
+ result['encoding'], http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type =
3402
+ self.getCharacterEncoding(f,data)
3403
+
3404
+ if not http_headers.empty? and not acceptable_content_type
3405
+ if http_headers.has_key?('content-type')
3406
+ bozo_message = "#{http_headers['content-type']} is not an XML media type"
3407
+ else
3408
+ bozo_message = 'no Content-type specified'
3409
+ end
3410
+ result['bozo'] = true
3411
+ result['bozo_exception'] = NonXMLContentType.new(bozo_message) # I get to care about this, cuz Mark says I should.
3412
+ end
3413
+ result['version'], data = self.stripDoctype(data)
3414
+ baseuri = http_headers['content-location'] || result['href']
3415
+ baselang = http_headers['content-language']
3416
+
3417
+ # if server sent 304, we're done
3418
+ if result['status'] == 304
3419
+ result['version'] = ''
3420
+ result['debug_message'] = "The feed has not changed since you last checked, " +
3421
+ "so the server sent no data. This is a feature, not a bug!"
3422
+ return result
3423
+ end
3424
+
3425
+ # if there was a problem downloading, we're done
3426
+ if data.nil? or data.empty?
3427
+ return result
3428
+ end
3429
+
3430
+ # determine character encoding
3431
+ use_strict_parser = false
3432
+ known_encoding = false
3433
+ tried_encodings = []
3434
+ proposed_encoding = nil
3435
+ # try: HTTP encoding, declared XML encoding, encoding sniffed from BOM
3436
+ [result['encoding'], xml_encoding, sniffed_xml_encoding].each do |proposed_encoding|
3437
+ next if proposed_encoding.nil? or proposed_encoding.empty?
3438
+ next if tried_encodings.include? proposed_encoding
3439
+ tried_encodings << proposed_encoding
3440
+ begin
3441
+ data = self.toUTF8(data, proposed_encoding)
3442
+ known_encoding = use_strict_parser = true
3443
+ break
3444
+ rescue
3445
+ end
3446
+ end
3447
+ # if no luck and we have auto-detection library, try that
3448
+ if not known_encoding and $chardet
3449
+ begin
3450
+ proposed_encoding = CharDet.detect(data)['encoding']
3451
+ if proposed_encoding and not tried_encodings.include?proposed_encoding
3452
+ tried_encodings << proposed_encoding
3453
+ data = self.toUTF8(data, proposed_encoding)
3454
+ known_encoding = use_strict_parser = true
3455
+ end
3456
+ rescue
3457
+ end
3458
+ end
3459
+
3460
+
3461
+
3462
+ # if still no luck and we haven't tried utf-8 yet, try that
3463
+ if not known_encoding and not tried_encodings.include?'utf-8'
3464
+ begin
3465
+ proposed_encoding = 'utf-8'
3466
+ tried_encodings << proposed_encoding
3467
+ data = self.toUTF8(data, proposed_encoding)
3468
+ known_encoding = use_strict_parser = true
3469
+ rescue
3470
+ end
3471
+ end
3472
+ # if still no luck and we haven't tried windows-1252 yet, try that
3473
+ if not known_encoding and not tried_encodings.include?'windows-1252'
3474
+ begin
3475
+ proposed_encdoing = 'windows-1252'
3476
+ tried_encodings << proposed_encoding
3477
+ data = self.toUTF8(data, proposed_encoding)
3478
+ known_encoding = use_strict_parser = true
3479
+ rescue
3480
+ end
3481
+ end
3482
+
3483
+ # NOTE this isn't in FeedParser.py 4.1
3484
+ # if still no luck and we haven't tried iso-8859-2 yet, try that.
3485
+ #if not known_encoding and not tried_encodings.include?'iso-8859-2'
3486
+ # begin
3487
+ # proposed_encoding = 'iso-8859-2'
3488
+ # tried_encodings << proposed_encoding
3489
+ # data = self.toUTF8(data, proposed_encoding)
3490
+ # known_encoding = use_strict_parser = true
3491
+ # rescue
3492
+ # end
3493
+ #end
3494
+
3495
+
3496
+ # if still no luck, give up
3497
+ if not known_encoding
3498
+ result['bozo'] = true
3499
+ result['bozo_exception'] = CharacterEncodingUnknown.new("document encoding unknown, I tried #{result['encoding']}, #{xml_encoding}, utf-8 and windows-1252 but nothing worked")
3500
+ result['encoding'] = ''
3501
+ elsif proposed_encoding != result['encoding']
3502
+ result['bozo'] = true
3503
+ result['bozo_exception'] = CharacterEncodingOverride.new("documented declared as #{result['encoding']}, but parsed as #{proposed_encoding}")
3504
+ result['encoding'] = proposed_encoding
3505
+ end
3506
+
3507
+ if use_strict_parser
3508
+ # initialize the SAX parser
3509
+ saxparser = XML::SAX::Helpers::ParserFactory.makeParser("XML::Parser::SAXDriver")
3510
+ feedparser = StrictFeedParser.new(baseuri, baselang, 'utf-8')
3511
+ saxparser.setDocumentHandler(feedparser)
3512
+ saxparser.setDTDHandler(feedparser)
3513
+ saxparser.setEntityResolver(feedparser)
3514
+ saxparser.setErrorHandler(feedparser)
3515
+
3516
+ inputdata = XML::SAX::InputSource.new('parsedfeed')
3517
+ inputdata.setByteStream(StringIO.new(data))
3518
+ begin
3519
+ saxparser.parse(inputdata)
3520
+ rescue Exception => parseerr # resparse
3521
+ if $debug
3522
+ $stderr << "xml parsing failed\n"
3523
+ $stderr << parseerr.to_s+"\n" # Hrmph.
3524
+ end
3525
+ result['bozo'] = true
3526
+ result['bozo_exception'] = feedparser.exc || e
3527
+ use_strict_parser = false
3528
+ end
3529
+ end
3530
+ if not use_strict_parser
3531
+ feedparser = LooseFeedParser.new(baseuri, baselang, (known_encoding and 'utf-8' or ''))
3532
+ feedparser.parse(data)
3533
+ $stderr << "Using LooseFeed\n\n" if $debug
3534
+ end
3535
+ result['feed'] = feedparser.feeddata
3536
+ result['entries'] = feedparser.entries
3537
+ result['version'] = result['version'] || feedparser.version
3538
+ result['namespaces'] = feedparser.namespacesInUse
3539
+ return result
3540
+ end
3541
+ end # End FeedParser module
3542
+
3543
+ class Serializer
3544
+ def initialize(results)
3545
+ @results = results
3546
+ end
3547
+ end
3548
+
3549
+ class TextSerializer < Serializer
3550
+ def write(stream=$stdout)
3551
+ writer(stream, @results, '')
3552
+ end
3553
+
3554
+ def writer(stream, node, prefix)
3555
+ return if (node.nil? or node.empty?)
3556
+ if node.methods.include?'keys'
3557
+ node.keys.sort.each do |key|
3558
+ next if ['description','link'].include? key
3559
+ next if node.has_key? k+'_detail'
3560
+ next if node.has_key? k+'_parsed'
3561
+ writer(stream,node[k], prefix+k+'.')
3562
+ end
3563
+ elsif node.class == Array
3564
+ node.each_with_index do |thing, index|
3565
+ writer(stream, thing, prefix[0..-2] + '[' + index.to_s + '].')
3566
+ end
3567
+ else
3568
+ begin
3569
+ s = u(node.to_s)
3570
+ stream << prefix[0..-2]
3571
+ stream << '='
3572
+ stream << s
3573
+ stream << "\n"
3574
+ rescue
3575
+ end
3576
+ end
3577
+ end
3578
+ end
3579
+
3580
+ class PprintSerializer < Serializer # FIXME ? use pp instead?
3581
+ def write(stream = $stdout)
3582
+ stream << @results['href'].to_s + "\n\n"
3583
+ pp(@results)
3584
+ stream << "\n"
3585
+ end
3586
+ end
3587
+
3588
+
3589
+ require 'optparse'
3590
+ require 'ostruct'
3591
+ options = OpenStruct.new
3592
+ options.etag = options.modified = options.agent = options.referrer = nil
3593
+ options.content_language = options.content_location = options.ctype = nil
3594
+ options.format = 'pprint'
3595
+ options.compatible = $compatible
3596
+ options.verbose = false
3597
+
3598
+ opts = OptionParser.new do |opts|
3599
+ opts.banner
3600
+ opts.separator ""
3601
+ opts.on("-A", "--user-agent [AGENT]",
3602
+ "User-Agent for HTTP URLs") {|agent|
3603
+ options.agent = agent
3604
+ }
3605
+
3606
+ opts.on("-e", "--referrer [URL]",
3607
+ "Referrer for HTTP URLs") {|referrer|
3608
+ options.referrer = referrer
3609
+ }
3610
+
3611
+ opts.on("-t", "--etag [TAG]",
3612
+ "ETag/If-None-Match for HTTP URLs") {|etag|
3613
+ options.etag = etag
3614
+ }
3615
+
3616
+ opts.on("-m", "--last-modified [DATE]",
3617
+ "Last-modified/If-Modified-Since for HTTP URLs (any supported date format)") {|modified|
3618
+ options.modified = modified
3619
+ }
3620
+
3621
+ opts.on("-f", "--format [FORMAT]", [:text, :pprint],
3622
+ "output resutls in FORMAT (text, pprint)") {|format|
3623
+ options.format = format
3624
+ }
3625
+
3626
+ opts.on("-v", "--[no-]verbose",
3627
+ "write debugging information to stderr") {|v|
3628
+ options.verbose = v
3629
+ }
3630
+
3631
+ opts.on("-c", "--[no-]compatible",
3632
+ "strip element attributes like feedparser.py 4.1 (default)") {|comp|
3633
+ options.compatible = comp
3634
+ }
3635
+ opts.on("-l", "--content-location [LOCATION]",
3636
+ "default Content-Location HTTP header") {|loc|
3637
+ options.content_location = loc
3638
+ }
3639
+ opts.on("-a", "--content-language [LANG]",
3640
+ "default Content-Language HTTP header") {|lang|
3641
+ options.content_language = lang
3642
+ }
3643
+ opts.on("-t", "--content-type [TYPE]",
3644
+ "default Content-type HTTP header") {|ctype|
3645
+ options.ctype = ctype
3646
+ }
3647
+ end
3648
+
3649
+ opts.parse!(ARGV)
3650
+ $debug = true if options.verbose
3651
+ $compatible = options.compatible unless options.compatible.nil?
3652
+
3653
+ if options.format == :text
3654
+ serializer = TextSerializer
3655
+ else
3656
+ serializer = PprintSerializer
3657
+ end
3658
+ args = *ARGV.dup
3659
+ unless args.nil?
3660
+ args.each do |url| # opts.parse! removes everything but the urls from the command line
3661
+ results = FeedParser.parse(url, :etag => options.etag,
3662
+ :modified => options.modified,
3663
+ :agent => options.agent,
3664
+ :referrer => options.referrer,
3665
+ :content_location => options.content_location,
3666
+ :content_language => options.content_language,
3667
+ :content_type => options.ctype
3668
+ )
3669
+ serializer.new(results).write($stdout)
3670
+ end
3671
+ end