rfeedparser 0.9.7
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +68 -0
- data/README +28 -0
- data/RUBY-TESTING +60 -0
- data/lib/feedparser.rb +3671 -0
- data/tests/feedparserserver.rb +115 -0
- data/tests/feedparsertest.rb +196 -0
- data/tests/illformed/amp/amp01.xml +9 -0
- data/tests/illformed/amp/amp02.xml +9 -0
- data/tests/illformed/amp/amp03.xml +9 -0
- data/tests/illformed/amp/amp04.xml +9 -0
- data/tests/illformed/amp/amp05.xml +9 -0
- data/tests/illformed/amp/amp06.xml +9 -0
- data/tests/illformed/amp/amp07.xml +9 -0
- data/tests/illformed/amp/amp08.xml +9 -0
- data/tests/illformed/amp/amp09.xml +9 -0
- data/tests/illformed/amp/amp10.xml +9 -0
- data/tests/illformed/amp/amp11.xml +9 -0
- data/tests/illformed/amp/amp12.xml +9 -0
- data/tests/illformed/amp/amp13.xml +9 -0
- data/tests/illformed/amp/amp14.xml +9 -0
- data/tests/illformed/amp/amp15.xml +9 -0
- data/tests/illformed/amp/amp16.xml +9 -0
- data/tests/illformed/amp/amp17.xml +9 -0
- data/tests/illformed/amp/amp18.xml +9 -0
- data/tests/illformed/amp/amp19.xml +9 -0
- data/tests/illformed/amp/amp20.xml +9 -0
- data/tests/illformed/amp/amp21.xml +9 -0
- data/tests/illformed/amp/amp22.xml +9 -0
- data/tests/illformed/amp/amp23.xml +9 -0
- data/tests/illformed/amp/amp24.xml +9 -0
- data/tests/illformed/amp/amp25.xml +9 -0
- data/tests/illformed/amp/amp26.xml +9 -0
- data/tests/illformed/amp/amp27.xml +9 -0
- data/tests/illformed/amp/amp28.xml +9 -0
- data/tests/illformed/amp/amp29.xml +9 -0
- data/tests/illformed/amp/amp30.xml +9 -0
- data/tests/illformed/amp/amp31.xml +9 -0
- data/tests/illformed/amp/amp32.xml +9 -0
- data/tests/illformed/amp/amp33.xml +9 -0
- data/tests/illformed/amp/amp34.xml +9 -0
- data/tests/illformed/amp/amp35.xml +9 -0
- data/tests/illformed/amp/amp36.xml +9 -0
- data/tests/illformed/amp/amp37.xml +9 -0
- data/tests/illformed/amp/amp38.xml +9 -0
- data/tests/illformed/amp/amp39.xml +9 -0
- data/tests/illformed/amp/amp40.xml +9 -0
- data/tests/illformed/amp/amp41.xml +9 -0
- data/tests/illformed/amp/amp42.xml +9 -0
- data/tests/illformed/amp/amp43.xml +9 -0
- data/tests/illformed/amp/amp44.xml +9 -0
- data/tests/illformed/amp/amp45.xml +9 -0
- data/tests/illformed/amp/amp46.xml +9 -0
- data/tests/illformed/amp/amp47.xml +9 -0
- data/tests/illformed/amp/amp48.xml +9 -0
- data/tests/illformed/amp/amp49.xml +9 -0
- data/tests/illformed/amp/amp50.xml +9 -0
- data/tests/illformed/amp/amp51.xml +9 -0
- data/tests/illformed/amp/amp52.xml +9 -0
- data/tests/illformed/amp/amp53.xml +9 -0
- data/tests/illformed/amp/amp54.xml +9 -0
- data/tests/illformed/amp/amp55.xml +9 -0
- data/tests/illformed/amp/amp56.xml +9 -0
- data/tests/illformed/amp/amp57.xml +9 -0
- data/tests/illformed/amp/amp58.xml +9 -0
- data/tests/illformed/amp/amp59.xml +9 -0
- data/tests/illformed/amp/amp60.xml +9 -0
- data/tests/illformed/amp/amp61.xml +9 -0
- data/tests/illformed/amp/amp62.xml +9 -0
- data/tests/illformed/amp/amp63.xml +9 -0
- data/tests/illformed/amp/amp64.xml +9 -0
- data/tests/illformed/atom/atom_namespace_1.xml +7 -0
- data/tests/illformed/atom/atom_namespace_2.xml +7 -0
- data/tests/illformed/atom/atom_namespace_3.xml +7 -0
- data/tests/illformed/atom/atom_namespace_4.xml +7 -0
- data/tests/illformed/atom/atom_namespace_5.xml +7 -0
- data/tests/illformed/atom/entry_author_email.xml +13 -0
- data/tests/illformed/atom/entry_author_homepage.xml +13 -0
- data/tests/illformed/atom/entry_author_map_author.xml +13 -0
- data/tests/illformed/atom/entry_author_map_author_2.xml +12 -0
- data/tests/illformed/atom/entry_author_name.xml +13 -0
- data/tests/illformed/atom/entry_author_uri.xml +13 -0
- data/tests/illformed/atom/entry_author_url.xml +13 -0
- data/tests/illformed/atom/entry_content_mode_base64.xml +11 -0
- data/tests/illformed/atom/entry_content_mode_escaped.xml +9 -0
- data/tests/illformed/atom/entry_content_type.xml +9 -0
- data/tests/illformed/atom/entry_content_type_text_plain.xml +9 -0
- data/tests/illformed/atom/entry_content_value.xml +9 -0
- data/tests/illformed/atom/entry_contributor_email.xml +13 -0
- data/tests/illformed/atom/entry_contributor_homepage.xml +13 -0
- data/tests/illformed/atom/entry_contributor_multiple.xml +18 -0
- data/tests/illformed/atom/entry_contributor_name.xml +13 -0
- data/tests/illformed/atom/entry_contributor_uri.xml +13 -0
- data/tests/illformed/atom/entry_contributor_url.xml +13 -0
- data/tests/illformed/atom/entry_id.xml +9 -0
- data/tests/illformed/atom/entry_id_map_guid.xml +9 -0
- data/tests/illformed/atom/entry_link_alternate_map_link.xml +9 -0
- data/tests/illformed/atom/entry_link_alternate_map_link_2.xml +9 -0
- data/tests/illformed/atom/entry_link_href.xml +9 -0
- data/tests/illformed/atom/entry_link_multiple.xml +10 -0
- data/tests/illformed/atom/entry_link_rel.xml +9 -0
- data/tests/illformed/atom/entry_link_title.xml +9 -0
- data/tests/illformed/atom/entry_link_type.xml +9 -0
- data/tests/illformed/atom/entry_summary.xml +9 -0
- data/tests/illformed/atom/entry_summary_base64.xml +11 -0
- data/tests/illformed/atom/entry_summary_base64_2.xml +11 -0
- data/tests/illformed/atom/entry_summary_content_mode_base64.xml +11 -0
- data/tests/illformed/atom/entry_summary_content_mode_escaped.xml +9 -0
- data/tests/illformed/atom/entry_summary_content_type.xml +9 -0
- data/tests/illformed/atom/entry_summary_content_type_text_plain.xml +9 -0
- data/tests/illformed/atom/entry_summary_content_value.xml +9 -0
- data/tests/illformed/atom/entry_summary_escaped_markup.xml +9 -0
- data/tests/illformed/atom/entry_summary_inline_markup.xml +9 -0
- data/tests/illformed/atom/entry_summary_inline_markup_2.xml +9 -0
- data/tests/illformed/atom/entry_summary_naked_markup.xml +9 -0
- data/tests/illformed/atom/entry_summary_text_plain.xml +9 -0
- data/tests/illformed/atom/entry_title.xml +9 -0
- data/tests/illformed/atom/entry_title_base64.xml +11 -0
- data/tests/illformed/atom/entry_title_base64_2.xml +11 -0
- data/tests/illformed/atom/entry_title_content_mode_base64.xml +11 -0
- data/tests/illformed/atom/entry_title_content_mode_escaped.xml +9 -0
- data/tests/illformed/atom/entry_title_content_type.xml +9 -0
- data/tests/illformed/atom/entry_title_content_type_text_plain.xml +9 -0
- data/tests/illformed/atom/entry_title_content_value.xml +9 -0
- data/tests/illformed/atom/entry_title_escaped_markup.xml +9 -0
- data/tests/illformed/atom/entry_title_inline_markup.xml +9 -0
- data/tests/illformed/atom/entry_title_inline_markup_2.xml +9 -0
- data/tests/illformed/atom/entry_title_naked_markup.xml +9 -0
- data/tests/illformed/atom/entry_title_text_plain.xml +9 -0
- data/tests/illformed/atom/entry_title_text_plain_brackets.xml +9 -0
- data/tests/illformed/atom/feed_author_email.xml +11 -0
- data/tests/illformed/atom/feed_author_homepage.xml +11 -0
- data/tests/illformed/atom/feed_author_map_author.xml +11 -0
- data/tests/illformed/atom/feed_author_map_author_2.xml +10 -0
- data/tests/illformed/atom/feed_author_name.xml +11 -0
- data/tests/illformed/atom/feed_author_uri.xml +11 -0
- data/tests/illformed/atom/feed_author_url.xml +11 -0
- data/tests/illformed/atom/feed_contributor_email.xml +11 -0
- data/tests/illformed/atom/feed_contributor_homepage.xml +11 -0
- data/tests/illformed/atom/feed_contributor_multiple.xml +16 -0
- data/tests/illformed/atom/feed_contributor_name.xml +11 -0
- data/tests/illformed/atom/feed_contributor_uri.xml +11 -0
- data/tests/illformed/atom/feed_contributor_url.xml +11 -0
- data/tests/illformed/atom/feed_copyright.xml +7 -0
- data/tests/illformed/atom/feed_copyright_base64.xml +9 -0
- data/tests/illformed/atom/feed_copyright_base64_2.xml +9 -0
- data/tests/illformed/atom/feed_copyright_content_mode_base64.xml +9 -0
- data/tests/illformed/atom/feed_copyright_content_mode_escaped.xml +7 -0
- data/tests/illformed/atom/feed_copyright_content_type.xml +7 -0
- data/tests/illformed/atom/feed_copyright_content_type_text_plain.xml +7 -0
- data/tests/illformed/atom/feed_copyright_content_value.xml +7 -0
- data/tests/illformed/atom/feed_copyright_escaped_markup.xml +7 -0
- data/tests/illformed/atom/feed_copyright_inline_markup.xml +7 -0
- data/tests/illformed/atom/feed_copyright_inline_markup_2.xml +7 -0
- data/tests/illformed/atom/feed_copyright_naked_markup.xml +7 -0
- data/tests/illformed/atom/feed_copyright_text_plain.xml +7 -0
- data/tests/illformed/atom/feed_generator.xml +7 -0
- data/tests/illformed/atom/feed_generator_name.xml +7 -0
- data/tests/illformed/atom/feed_generator_url.xml +7 -0
- data/tests/illformed/atom/feed_generator_version.xml +7 -0
- data/tests/illformed/atom/feed_id.xml +7 -0
- data/tests/illformed/atom/feed_id_map_guid.xml +7 -0
- data/tests/illformed/atom/feed_info.xml +7 -0
- data/tests/illformed/atom/feed_info_base64.xml +9 -0
- data/tests/illformed/atom/feed_info_base64_2.xml +9 -0
- data/tests/illformed/atom/feed_info_content_mode_base64.xml +9 -0
- data/tests/illformed/atom/feed_info_content_mode_escaped.xml +7 -0
- data/tests/illformed/atom/feed_info_content_type.xml +7 -0
- data/tests/illformed/atom/feed_info_content_type_text_plain.xml +7 -0
- data/tests/illformed/atom/feed_info_content_value.xml +7 -0
- data/tests/illformed/atom/feed_info_escaped_markup.xml +7 -0
- data/tests/illformed/atom/feed_info_inline_markup.xml +7 -0
- data/tests/illformed/atom/feed_info_inline_markup_2.xml +7 -0
- data/tests/illformed/atom/feed_info_naked_markup.xml +7 -0
- data/tests/illformed/atom/feed_info_text_plain.xml +7 -0
- data/tests/illformed/atom/feed_link_alternate_map_link.xml +7 -0
- data/tests/illformed/atom/feed_link_alternate_map_link_2.xml +7 -0
- data/tests/illformed/atom/feed_link_href.xml +7 -0
- data/tests/illformed/atom/feed_link_multiple.xml +8 -0
- data/tests/illformed/atom/feed_link_rel.xml +7 -0
- data/tests/illformed/atom/feed_link_title.xml +7 -0
- data/tests/illformed/atom/feed_link_type.xml +7 -0
- data/tests/illformed/atom/feed_tagline.xml +7 -0
- data/tests/illformed/atom/feed_tagline_base64.xml +9 -0
- data/tests/illformed/atom/feed_tagline_base64_2.xml +9 -0
- data/tests/illformed/atom/feed_tagline_content_mode_base64.xml +9 -0
- data/tests/illformed/atom/feed_tagline_content_mode_escaped.xml +7 -0
- data/tests/illformed/atom/feed_tagline_content_type.xml +7 -0
- data/tests/illformed/atom/feed_tagline_content_type_text_plain.xml +7 -0
- data/tests/illformed/atom/feed_tagline_content_value.xml +7 -0
- data/tests/illformed/atom/feed_tagline_escaped_markup.xml +7 -0
- data/tests/illformed/atom/feed_tagline_inline_markup.xml +7 -0
- data/tests/illformed/atom/feed_tagline_inline_markup_2.xml +7 -0
- data/tests/illformed/atom/feed_tagline_naked_markup.xml +7 -0
- data/tests/illformed/atom/feed_tagline_text_plain.xml +7 -0
- data/tests/illformed/atom/feed_title.xml +7 -0
- data/tests/illformed/atom/feed_title_base64.xml +9 -0
- data/tests/illformed/atom/feed_title_base64_2.xml +9 -0
- data/tests/illformed/atom/feed_title_content_mode_base64.xml +9 -0
- data/tests/illformed/atom/feed_title_content_mode_escaped.xml +7 -0
- data/tests/illformed/atom/feed_title_content_type.xml +7 -0
- data/tests/illformed/atom/feed_title_content_type_text_plain.xml +7 -0
- data/tests/illformed/atom/feed_title_content_value.xml +7 -0
- data/tests/illformed/atom/feed_title_escaped_markup.xml +7 -0
- data/tests/illformed/atom/feed_title_inline_markup.xml +7 -0
- data/tests/illformed/atom/feed_title_inline_markup_2.xml +7 -0
- data/tests/illformed/atom/feed_title_naked_markup.xml +7 -0
- data/tests/illformed/atom/feed_title_text_plain.xml +7 -0
- data/tests/illformed/atom/relative_uri.xml +7 -0
- data/tests/illformed/atom/relative_uri_inherit.xml +7 -0
- data/tests/illformed/atom/relative_uri_inherit_2.xml +7 -0
- data/tests/illformed/atom10/atom10_namespace.xml +7 -0
- data/tests/illformed/atom10/atom10_version.xml +6 -0
- data/tests/illformed/atom10/entry_author_email.xml +13 -0
- data/tests/illformed/atom10/entry_author_map_author.xml +13 -0
- data/tests/illformed/atom10/entry_author_map_author_2.xml +12 -0
- data/tests/illformed/atom10/entry_author_name.xml +13 -0
- data/tests/illformed/atom10/entry_author_uri.xml +13 -0
- data/tests/illformed/atom10/entry_author_url.xml +13 -0
- data/tests/illformed/atom10/entry_category_label.xml +9 -0
- data/tests/illformed/atom10/entry_category_scheme.xml +9 -0
- data/tests/illformed/atom10/entry_category_term.xml +9 -0
- data/tests/illformed/atom10/entry_content_application_xml.xml +9 -0
- data/tests/illformed/atom10/entry_content_base64.xml +11 -0
- data/tests/illformed/atom10/entry_content_base64_2.xml +11 -0
- data/tests/illformed/atom10/entry_content_escaped_markup.xml +9 -0
- data/tests/illformed/atom10/entry_content_inline_markup.xml +9 -0
- data/tests/illformed/atom10/entry_content_inline_markup_2.xml +9 -0
- data/tests/illformed/atom10/entry_content_src.xml +9 -0
- data/tests/illformed/atom10/entry_content_text_plain.xml +9 -0
- data/tests/illformed/atom10/entry_content_text_plain_brackets.xml +9 -0
- data/tests/illformed/atom10/entry_content_type.xml +9 -0
- data/tests/illformed/atom10/entry_content_type_text.xml +9 -0
- data/tests/illformed/atom10/entry_content_value.xml +9 -0
- data/tests/illformed/atom10/entry_contributor_email.xml +13 -0
- data/tests/illformed/atom10/entry_contributor_multiple.xml +18 -0
- data/tests/illformed/atom10/entry_contributor_name.xml +13 -0
- data/tests/illformed/atom10/entry_contributor_uri.xml +13 -0
- data/tests/illformed/atom10/entry_contributor_url.xml +13 -0
- data/tests/illformed/atom10/entry_id.xml +9 -0
- data/tests/illformed/atom10/entry_id_map_guid.xml +9 -0
- data/tests/illformed/atom10/entry_id_no_normalization_1.xml +9 -0
- data/tests/illformed/atom10/entry_id_no_normalization_2.xml +9 -0
- data/tests/illformed/atom10/entry_id_no_normalization_3.xml +9 -0
- data/tests/illformed/atom10/entry_id_no_normalization_4.xml +9 -0
- data/tests/illformed/atom10/entry_id_no_normalization_5.xml +9 -0
- data/tests/illformed/atom10/entry_id_no_normalization_6.xml +9 -0
- data/tests/illformed/atom10/entry_id_no_normalization_7.xml +9 -0
- data/tests/illformed/atom10/entry_link_alternate_map_link.xml +9 -0
- data/tests/illformed/atom10/entry_link_alternate_map_link_2.xml +9 -0
- data/tests/illformed/atom10/entry_link_alternate_map_link_3.xml +11 -0
- data/tests/illformed/atom10/entry_link_href.xml +9 -0
- data/tests/illformed/atom10/entry_link_hreflang.xml +9 -0
- data/tests/illformed/atom10/entry_link_length.xml +9 -0
- data/tests/illformed/atom10/entry_link_multiple.xml +10 -0
- data/tests/illformed/atom10/entry_link_no_rel.xml +9 -0
- data/tests/illformed/atom10/entry_link_rel.xml +9 -0
- data/tests/illformed/atom10/entry_link_rel_enclosure.xml +9 -0
- data/tests/illformed/atom10/entry_link_rel_enclosure_map_enclosure_length.xml +9 -0
- data/tests/illformed/atom10/entry_link_rel_enclosure_map_enclosure_type.xml +9 -0
- data/tests/illformed/atom10/entry_link_rel_enclosure_map_enclosure_url.xml +9 -0
- data/tests/illformed/atom10/entry_link_rel_other.xml +9 -0
- data/tests/illformed/atom10/entry_link_rel_related.xml +9 -0
- data/tests/illformed/atom10/entry_link_rel_self.xml +9 -0
- data/tests/illformed/atom10/entry_link_rel_via.xml +9 -0
- data/tests/illformed/atom10/entry_link_title.xml +9 -0
- data/tests/illformed/atom10/entry_link_type.xml +9 -0
- data/tests/illformed/atom10/entry_rights.xml +9 -0
- data/tests/illformed/atom10/entry_rights_content_value.xml +9 -0
- data/tests/illformed/atom10/entry_rights_escaped_markup.xml +9 -0
- data/tests/illformed/atom10/entry_rights_inline_markup.xml +9 -0
- data/tests/illformed/atom10/entry_rights_inline_markup_2.xml +9 -0
- data/tests/illformed/atom10/entry_rights_text_plain.xml +9 -0
- data/tests/illformed/atom10/entry_rights_text_plain_brackets.xml +9 -0
- data/tests/illformed/atom10/entry_rights_type_default.xml +9 -0
- data/tests/illformed/atom10/entry_rights_type_text.xml +9 -0
- data/tests/illformed/atom10/entry_source_author_email.xml +15 -0
- data/tests/illformed/atom10/entry_source_author_map_author.xml +15 -0
- data/tests/illformed/atom10/entry_source_author_map_author_2.xml +14 -0
- data/tests/illformed/atom10/entry_source_author_name.xml +15 -0
- data/tests/illformed/atom10/entry_source_author_uri.xml +15 -0
- data/tests/illformed/atom10/entry_source_category_label.xml +11 -0
- data/tests/illformed/atom10/entry_source_category_scheme.xml +11 -0
- data/tests/illformed/atom10/entry_source_category_term.xml +11 -0
- data/tests/illformed/atom10/entry_source_contributor_email.xml +15 -0
- data/tests/illformed/atom10/entry_source_contributor_multiple.xml +20 -0
- data/tests/illformed/atom10/entry_source_contributor_name.xml +15 -0
- data/tests/illformed/atom10/entry_source_contributor_uri.xml +15 -0
- data/tests/illformed/atom10/entry_source_generator.xml +11 -0
- data/tests/illformed/atom10/entry_source_generator_name.xml +11 -0
- data/tests/illformed/atom10/entry_source_generator_uri.xml +11 -0
- data/tests/illformed/atom10/entry_source_generator_version.xml +11 -0
- data/tests/illformed/atom10/entry_source_icon.xml +11 -0
- data/tests/illformed/atom10/entry_source_id.xml +11 -0
- data/tests/illformed/atom10/entry_source_link_alternate_map_link.xml +11 -0
- data/tests/illformed/atom10/entry_source_link_alternate_map_link_2.xml +11 -0
- data/tests/illformed/atom10/entry_source_link_href.xml +11 -0
- data/tests/illformed/atom10/entry_source_link_hreflang.xml +11 -0
- data/tests/illformed/atom10/entry_source_link_length.xml +11 -0
- data/tests/illformed/atom10/entry_source_link_multiple.xml +12 -0
- data/tests/illformed/atom10/entry_source_link_no_rel.xml +11 -0
- data/tests/illformed/atom10/entry_source_link_rel.xml +11 -0
- data/tests/illformed/atom10/entry_source_link_rel_other.xml +11 -0
- data/tests/illformed/atom10/entry_source_link_rel_related.xml +11 -0
- data/tests/illformed/atom10/entry_source_link_rel_self.xml +11 -0
- data/tests/illformed/atom10/entry_source_link_rel_via.xml +11 -0
- data/tests/illformed/atom10/entry_source_link_title.xml +11 -0
- data/tests/illformed/atom10/entry_source_link_type.xml +11 -0
- data/tests/illformed/atom10/entry_source_logo.xml +11 -0
- data/tests/illformed/atom10/entry_source_rights.xml +11 -0
- data/tests/illformed/atom10/entry_source_rights_base64.xml +13 -0
- data/tests/illformed/atom10/entry_source_rights_base64_2.xml +13 -0
- data/tests/illformed/atom10/entry_source_rights_content_type.xml +11 -0
- data/tests/illformed/atom10/entry_source_rights_content_type_text.xml +11 -0
- data/tests/illformed/atom10/entry_source_rights_content_value.xml +11 -0
- data/tests/illformed/atom10/entry_source_rights_escaped_markup.xml +11 -0
- data/tests/illformed/atom10/entry_source_rights_inline_markup.xml +11 -0
- data/tests/illformed/atom10/entry_source_rights_inline_markup_2.xml +11 -0
- data/tests/illformed/atom10/entry_source_rights_text_plain.xml +11 -0
- data/tests/illformed/atom10/entry_source_subittle_content_type_text.xml +11 -0
- data/tests/illformed/atom10/entry_source_subtitle.xml +11 -0
- data/tests/illformed/atom10/entry_source_subtitle_base64.xml +13 -0
- data/tests/illformed/atom10/entry_source_subtitle_base64_2.xml +13 -0
- data/tests/illformed/atom10/entry_source_subtitle_content_type.xml +11 -0
- data/tests/illformed/atom10/entry_source_subtitle_content_value.xml +11 -0
- data/tests/illformed/atom10/entry_source_subtitle_escaped_markup.xml +11 -0
- data/tests/illformed/atom10/entry_source_subtitle_inline_markup.xml +11 -0
- data/tests/illformed/atom10/entry_source_subtitle_inline_markup_2.xml +11 -0
- data/tests/illformed/atom10/entry_source_subtitle_text_plain.xml +11 -0
- data/tests/illformed/atom10/entry_source_title.xml +11 -0
- data/tests/illformed/atom10/entry_source_title_base64.xml +13 -0
- data/tests/illformed/atom10/entry_source_title_base64_2.xml +13 -0
- data/tests/illformed/atom10/entry_source_title_content_type.xml +11 -0
- data/tests/illformed/atom10/entry_source_title_content_type_text.xml +11 -0
- data/tests/illformed/atom10/entry_source_title_content_value.xml +11 -0
- data/tests/illformed/atom10/entry_source_title_escaped_markup.xml +11 -0
- data/tests/illformed/atom10/entry_source_title_inline_markup.xml +11 -0
- data/tests/illformed/atom10/entry_source_title_inline_markup_2.xml +11 -0
- data/tests/illformed/atom10/entry_source_title_text_plain.xml +11 -0
- data/tests/illformed/atom10/entry_summary.xml +9 -0
- data/tests/illformed/atom10/entry_summary_base64.xml +11 -0
- data/tests/illformed/atom10/entry_summary_base64_2.xml +11 -0
- data/tests/illformed/atom10/entry_summary_content_value.xml +9 -0
- data/tests/illformed/atom10/entry_summary_escaped_markup.xml +9 -0
- data/tests/illformed/atom10/entry_summary_inline_markup.xml +9 -0
- data/tests/illformed/atom10/entry_summary_inline_markup_2.xml +9 -0
- data/tests/illformed/atom10/entry_summary_text_plain.xml +9 -0
- data/tests/illformed/atom10/entry_summary_type_default.xml +9 -0
- data/tests/illformed/atom10/entry_summary_type_text.xml +9 -0
- data/tests/illformed/atom10/entry_title.xml +9 -0
- data/tests/illformed/atom10/entry_title_base64.xml +11 -0
- data/tests/illformed/atom10/entry_title_base64_2.xml +11 -0
- data/tests/illformed/atom10/entry_title_content_value.xml +9 -0
- data/tests/illformed/atom10/entry_title_escaped_markup.xml +9 -0
- data/tests/illformed/atom10/entry_title_inline_markup.xml +9 -0
- data/tests/illformed/atom10/entry_title_inline_markup_2.xml +9 -0
- data/tests/illformed/atom10/entry_title_text_plain.xml +9 -0
- data/tests/illformed/atom10/entry_title_text_plain_brackets.xml +9 -0
- data/tests/illformed/atom10/entry_title_type_default.xml +9 -0
- data/tests/illformed/atom10/entry_title_type_text.xml +9 -0
- data/tests/illformed/atom10/feed_author_email.xml +11 -0
- data/tests/illformed/atom10/feed_author_map_author.xml +11 -0
- data/tests/illformed/atom10/feed_author_map_author_2.xml +10 -0
- data/tests/illformed/atom10/feed_author_name.xml +11 -0
- data/tests/illformed/atom10/feed_author_uri.xml +11 -0
- data/tests/illformed/atom10/feed_author_url.xml +11 -0
- data/tests/illformed/atom10/feed_contributor_email.xml +11 -0
- data/tests/illformed/atom10/feed_contributor_multiple.xml +16 -0
- data/tests/illformed/atom10/feed_contributor_name.xml +11 -0
- data/tests/illformed/atom10/feed_contributor_uri.xml +11 -0
- data/tests/illformed/atom10/feed_contributor_url.xml +11 -0
- data/tests/illformed/atom10/feed_generator.xml +7 -0
- data/tests/illformed/atom10/feed_generator_name.xml +7 -0
- data/tests/illformed/atom10/feed_generator_url.xml +7 -0
- data/tests/illformed/atom10/feed_generator_version.xml +7 -0
- data/tests/illformed/atom10/feed_icon.xml +7 -0
- data/tests/illformed/atom10/feed_id.xml +7 -0
- data/tests/illformed/atom10/feed_id_map_guid.xml +7 -0
- data/tests/illformed/atom10/feed_link_alternate_map_link.xml +7 -0
- data/tests/illformed/atom10/feed_link_alternate_map_link_2.xml +7 -0
- data/tests/illformed/atom10/feed_link_href.xml +7 -0
- data/tests/illformed/atom10/feed_link_hreflang.xml +7 -0
- data/tests/illformed/atom10/feed_link_length.xml +7 -0
- data/tests/illformed/atom10/feed_link_multiple.xml +8 -0
- data/tests/illformed/atom10/feed_link_no_rel.xml +7 -0
- data/tests/illformed/atom10/feed_link_rel.xml +7 -0
- data/tests/illformed/atom10/feed_link_rel_other.xml +7 -0
- data/tests/illformed/atom10/feed_link_rel_related.xml +7 -0
- data/tests/illformed/atom10/feed_link_rel_self.xml +7 -0
- data/tests/illformed/atom10/feed_link_rel_via.xml +7 -0
- data/tests/illformed/atom10/feed_link_title.xml +7 -0
- data/tests/illformed/atom10/feed_link_type.xml +7 -0
- data/tests/illformed/atom10/feed_logo.xml +7 -0
- data/tests/illformed/atom10/feed_rights.xml +7 -0
- data/tests/illformed/atom10/feed_rights_base64.xml +9 -0
- data/tests/illformed/atom10/feed_rights_base64_2.xml +9 -0
- data/tests/illformed/atom10/feed_rights_content_type.xml +7 -0
- data/tests/illformed/atom10/feed_rights_content_type_text.xml +7 -0
- data/tests/illformed/atom10/feed_rights_content_value.xml +7 -0
- data/tests/illformed/atom10/feed_rights_escaped_markup.xml +7 -0
- data/tests/illformed/atom10/feed_rights_inline_markup.xml +7 -0
- data/tests/illformed/atom10/feed_rights_inline_markup_2.xml +7 -0
- data/tests/illformed/atom10/feed_rights_text_plain.xml +7 -0
- data/tests/illformed/atom10/feed_subtitle.xml +7 -0
- data/tests/illformed/atom10/feed_subtitle_base64.xml +9 -0
- data/tests/illformed/atom10/feed_subtitle_base64_2.xml +9 -0
- data/tests/illformed/atom10/feed_subtitle_content_type.xml +7 -0
- data/tests/illformed/atom10/feed_subtitle_content_type_text.xml +7 -0
- data/tests/illformed/atom10/feed_subtitle_content_value.xml +7 -0
- data/tests/illformed/atom10/feed_subtitle_escaped_markup.xml +7 -0
- data/tests/illformed/atom10/feed_subtitle_inline_markup.xml +7 -0
- data/tests/illformed/atom10/feed_subtitle_inline_markup_2.xml +7 -0
- data/tests/illformed/atom10/feed_subtitle_text_plain.xml +7 -0
- data/tests/illformed/atom10/feed_title.xml +7 -0
- data/tests/illformed/atom10/feed_title_base64.xml +9 -0
- data/tests/illformed/atom10/feed_title_base64_2.xml +9 -0
- data/tests/illformed/atom10/feed_title_content_type.xml +7 -0
- data/tests/illformed/atom10/feed_title_content_type_text.xml +7 -0
- data/tests/illformed/atom10/feed_title_content_value.xml +7 -0
- data/tests/illformed/atom10/feed_title_escaped_markup.xml +7 -0
- data/tests/illformed/atom10/feed_title_inline_markup.xml +7 -0
- data/tests/illformed/atom10/feed_title_inline_markup_2.xml +7 -0
- data/tests/illformed/atom10/feed_title_text_plain.xml +7 -0
- data/tests/illformed/atom10/relative_uri.xml +7 -0
- data/tests/illformed/atom10/relative_uri_inherit.xml +7 -0
- data/tests/illformed/atom10/relative_uri_inherit_2.xml +7 -0
- data/tests/illformed/base/cdf_item_abstract_xml_base.xml +18 -0
- data/tests/illformed/base/entry_content_xml_base.xml +9 -0
- data/tests/illformed/base/entry_content_xml_base_inherit.xml +9 -0
- data/tests/illformed/base/entry_content_xml_base_inherit_2.xml +9 -0
- data/tests/illformed/base/entry_content_xml_base_inherit_3.xml +10 -0
- data/tests/illformed/base/entry_content_xml_base_inherit_4.xml +10 -0
- data/tests/illformed/base/entry_summary_xml_base.xml +9 -0
- data/tests/illformed/base/entry_summary_xml_base_inherit.xml +9 -0
- data/tests/illformed/base/entry_summary_xml_base_inherit_2.xml +9 -0
- data/tests/illformed/base/entry_summary_xml_base_inherit_3.xml +10 -0
- data/tests/illformed/base/entry_summary_xml_base_inherit_4.xml +10 -0
- data/tests/illformed/base/entry_title_xml_base.xml +9 -0
- data/tests/illformed/base/entry_title_xml_base_inherit.xml +9 -0
- data/tests/illformed/base/entry_title_xml_base_inherit_2.xml +9 -0
- data/tests/illformed/base/entry_title_xml_base_inherit_3.xml +10 -0
- data/tests/illformed/base/entry_title_xml_base_inherit_4.xml +10 -0
- data/tests/illformed/base/feed_copyright_xml_base.xml +7 -0
- data/tests/illformed/base/feed_copyright_xml_base_inherit.xml +7 -0
- data/tests/illformed/base/feed_copyright_xml_base_inherit_2.xml +7 -0
- data/tests/illformed/base/feed_copyright_xml_base_inherit_3.xml +8 -0
- data/tests/illformed/base/feed_copyright_xml_base_inherit_4.xml +8 -0
- data/tests/illformed/base/feed_info_xml_base.xml +7 -0
- data/tests/illformed/base/feed_info_xml_base_inherit.xml +7 -0
- data/tests/illformed/base/feed_info_xml_base_inherit_2.xml +7 -0
- data/tests/illformed/base/feed_info_xml_base_inherit_3.xml +8 -0
- data/tests/illformed/base/feed_info_xml_base_inherit_4.xml +8 -0
- data/tests/illformed/base/feed_tagline_xml_base.xml +7 -0
- data/tests/illformed/base/feed_tagline_xml_base_inherit.xml +7 -0
- data/tests/illformed/base/feed_tagline_xml_base_inherit_2.xml +7 -0
- data/tests/illformed/base/feed_tagline_xml_base_inherit_3.xml +8 -0
- data/tests/illformed/base/feed_tagline_xml_base_inherit_4.xml +8 -0
- data/tests/illformed/base/feed_title_xml_base.xml +7 -0
- data/tests/illformed/base/feed_title_xml_base_inherit.xml +7 -0
- data/tests/illformed/base/feed_title_xml_base_inherit_2.xml +7 -0
- data/tests/illformed/base/feed_title_xml_base_inherit_3.xml +8 -0
- data/tests/illformed/base/feed_title_xml_base_inherit_4.xml +8 -0
- data/tests/illformed/base/http_channel_docs_base_content_location.xml +10 -0
- data/tests/illformed/base/http_channel_docs_base_docuri.xml +9 -0
- data/tests/illformed/base/http_channel_link_base_content_location.xml +10 -0
- data/tests/illformed/base/http_channel_link_base_docuri.xml +9 -0
- data/tests/illformed/base/http_entry_author_url_base_content_location.xml +12 -0
- data/tests/illformed/base/http_entry_author_url_base_docuri.xml +11 -0
- data/tests/illformed/base/http_entry_content_base64_base_content_location.xml +12 -0
- data/tests/illformed/base/http_entry_content_base64_base_docuri.xml +11 -0
- data/tests/illformed/base/http_entry_content_base_content_location.xml +10 -0
- data/tests/illformed/base/http_entry_content_base_docuri.xml +9 -0
- data/tests/illformed/base/http_entry_content_inline_base_content_location.xml +10 -0
- data/tests/illformed/base/http_entry_content_inline_base_docuri.xml +9 -0
- data/tests/illformed/base/http_entry_contributor_url_base_content_location.xml +12 -0
- data/tests/illformed/base/http_entry_contributor_url_base_docuri.xml +11 -0
- data/tests/illformed/base/http_entry_id_base_content_location.xml +10 -0
- data/tests/illformed/base/http_entry_id_base_docuri.xml +9 -0
- data/tests/illformed/base/http_entry_link_base_content_location.xml +10 -0
- data/tests/illformed/base/http_entry_link_base_docuri.xml +9 -0
- data/tests/illformed/base/http_entry_summary_base64_base_content_location.xml +12 -0
- data/tests/illformed/base/http_entry_summary_base64_base_docuri.xml +11 -0
- data/tests/illformed/base/http_entry_summary_base_content_location.xml +10 -0
- data/tests/illformed/base/http_entry_summary_base_docuri.xml +9 -0
- data/tests/illformed/base/http_entry_summary_inline_base_content_location.xml +10 -0
- data/tests/illformed/base/http_entry_summary_inline_base_docuri.xml +9 -0
- data/tests/illformed/base/http_entry_title_base64_base_content_location.xml +12 -0
- data/tests/illformed/base/http_entry_title_base64_base_docuri.xml +11 -0
- data/tests/illformed/base/http_entry_title_base_content_location.xml +10 -0
- data/tests/illformed/base/http_entry_title_base_docuri.xml +9 -0
- data/tests/illformed/base/http_entry_title_inline_base_content_location.xml +10 -0
- data/tests/illformed/base/http_entry_title_inline_base_docuri.xml +9 -0
- data/tests/illformed/base/http_feed_author_url_base_content_location.xml +10 -0
- data/tests/illformed/base/http_feed_author_url_base_docuri.xml +9 -0
- data/tests/illformed/base/http_feed_contributor_url_base_content_location.xml +10 -0
- data/tests/illformed/base/http_feed_contributor_url_base_docuri.xml +9 -0
- data/tests/illformed/base/http_feed_copyright_base64_base_content_location.xml +10 -0
- data/tests/illformed/base/http_feed_copyright_base64_base_docuri.xml +9 -0
- data/tests/illformed/base/http_feed_copyright_base_content_location.xml +8 -0
- data/tests/illformed/base/http_feed_copyright_base_docuri.xml +7 -0
- data/tests/illformed/base/http_feed_copyright_inline_base_content_location.xml +8 -0
- data/tests/illformed/base/http_feed_copyright_inline_base_docuri.xml +7 -0
- data/tests/illformed/base/http_feed_generator_url_base_content_location.xml +8 -0
- data/tests/illformed/base/http_feed_generator_url_base_docuri.xml +7 -0
- data/tests/illformed/base/http_feed_id_base_content_location.xml +8 -0
- data/tests/illformed/base/http_feed_id_base_docuri.xml +7 -0
- data/tests/illformed/base/http_feed_info_base64_base_content_location.xml +10 -0
- data/tests/illformed/base/http_feed_info_base64_base_docuri.xml +9 -0
- data/tests/illformed/base/http_feed_info_base_content_location.xml +8 -0
- data/tests/illformed/base/http_feed_info_base_docuri.xml +7 -0
- data/tests/illformed/base/http_feed_info_inline_base_content_location.xml +8 -0
- data/tests/illformed/base/http_feed_info_inline_base_docuri.xml +7 -0
- data/tests/illformed/base/http_feed_link_base_content_location.xml +8 -0
- data/tests/illformed/base/http_feed_link_base_docuri.xml +7 -0
- data/tests/illformed/base/http_feed_tagline_base64_base_content_location.xml +10 -0
- data/tests/illformed/base/http_feed_tagline_base64_base_docuri.xml +9 -0
- data/tests/illformed/base/http_feed_tagline_base_content_location.xml +8 -0
- data/tests/illformed/base/http_feed_tagline_base_docuri.xml +7 -0
- data/tests/illformed/base/http_feed_tagline_inline_base_content_location.xml +8 -0
- data/tests/illformed/base/http_feed_tagline_inline_base_docuri.xml +7 -0
- data/tests/illformed/base/http_feed_title_base64_base_content_location.xml +10 -0
- data/tests/illformed/base/http_feed_title_base64_base_docuri.xml +9 -0
- data/tests/illformed/base/http_feed_title_base_content_location.xml +8 -0
- data/tests/illformed/base/http_feed_title_base_docuri.xml +7 -0
- data/tests/illformed/base/http_feed_title_inline_base_content_location.xml +8 -0
- data/tests/illformed/base/http_feed_title_inline_base_docuri.xml +7 -0
- data/tests/illformed/base/http_item_body_base_content_location.xml +12 -0
- data/tests/illformed/base/http_item_body_base_docuri.xml +11 -0
- data/tests/illformed/base/http_item_comments_base_content_location.xml +12 -0
- data/tests/illformed/base/http_item_comments_base_docuri.xml +11 -0
- data/tests/illformed/base/http_item_content_encoded_base_content_location.xml +12 -0
- data/tests/illformed/base/http_item_content_encoded_base_docuri.xml +11 -0
- data/tests/illformed/base/http_item_description_base_content_location.xml +12 -0
- data/tests/illformed/base/http_item_description_base_docuri.xml +11 -0
- data/tests/illformed/base/http_item_fullitem_base_content_location.xml +12 -0
- data/tests/illformed/base/http_item_fullitem_base_docuri.xml +11 -0
- data/tests/illformed/base/http_item_link_base_content_location.xml +12 -0
- data/tests/illformed/base/http_item_link_base_docuri.xml +11 -0
- data/tests/illformed/base/http_item_wfw_commentRSS_base_content_location.xml +12 -0
- data/tests/illformed/base/http_item_wfw_commentRSS_base_docuri.xml +11 -0
- data/tests/illformed/base/http_item_wfw_comment_base_content_location.xml +12 -0
- data/tests/illformed/base/http_item_wfw_comment_base_docuri.xml +11 -0
- data/tests/illformed/base/http_item_xhtml_body_base_content_location.xml +12 -0
- data/tests/illformed/base/http_item_xhtml_body_base_docuri.xml +11 -0
- data/tests/illformed/base/http_relative_xml_base.xml +10 -0
- data/tests/illformed/base/malformed_base.xml +9 -0
- data/tests/illformed/base/relative_xml_base.xml +9 -0
- data/tests/illformed/base/relative_xml_base_2.xml +9 -0
- data/tests/illformed/cdf/channel_abstract_map_description.xml +7 -0
- data/tests/illformed/cdf/channel_abstract_map_tagline.xml +7 -0
- data/tests/illformed/cdf/channel_href_map_link.xml +6 -0
- data/tests/illformed/cdf/channel_href_map_links.xml +6 -0
- data/tests/illformed/cdf/channel_title.xml +7 -0
- data/tests/illformed/cdf/item_abstract_map_description.xml +9 -0
- data/tests/illformed/cdf/item_abstract_map_summary.xml +9 -0
- data/tests/illformed/cdf/item_href_map_link.xml +8 -0
- data/tests/illformed/cdf/item_href_map_links.xml +8 -0
- data/tests/illformed/cdf/item_title.xml +9 -0
- data/tests/illformed/chardet/big5.xml +8 -0
- data/tests/illformed/chardet/eucjp.xml +13 -0
- data/tests/illformed/chardet/euckr.xml +13 -0
- data/tests/illformed/chardet/gb2312.xml +12 -0
- data/tests/illformed/chardet/koi8r.xml +14 -0
- data/tests/illformed/chardet/shiftjis.xml +11 -0
- data/tests/illformed/chardet/tis620.xml +12 -0
- data/tests/illformed/chardet/windows1255.xml +14 -0
- data/tests/illformed/date/cdf_channel_lastmod_map_date.xml +6 -0
- data/tests/illformed/date/cdf_channel_lastmod_map_modified.xml +6 -0
- data/tests/illformed/date/cdf_channel_lastmod_map_modified_parsed.xml +6 -0
- data/tests/illformed/date/cdf_item_lastmod_map_date.xml +8 -0
- data/tests/illformed/date/cdf_item_lastmod_map_modified.xml +8 -0
- data/tests/illformed/date/cdf_item_lastmod_map_modified_parsed.xml +8 -0
- data/tests/illformed/date/channel_dc_date.xml +9 -0
- data/tests/illformed/date/channel_dc_date_map_modified.xml +9 -0
- data/tests/illformed/date/channel_dc_date_w3dtf_utc.xml +9 -0
- data/tests/illformed/date/channel_dc_date_w3dtf_utc_map_modified_parsed.xml +9 -0
- data/tests/illformed/date/channel_dcterms_created.xml +9 -0
- data/tests/illformed/date/channel_dcterms_created_w3dtf_utc.xml +9 -0
- data/tests/illformed/date/channel_dcterms_issued.xml +9 -0
- data/tests/illformed/date/channel_dcterms_issued_w3dtf_utc.xml +9 -0
- data/tests/illformed/date/channel_dcterms_modified.xml +9 -0
- data/tests/illformed/date/channel_dcterms_modified_map_date.xml +9 -0
- data/tests/illformed/date/channel_dcterms_modified_w3dtf_utc.xml +9 -0
- data/tests/illformed/date/channel_dcterms_modified_w3dtf_utc_map_date.xml +9 -0
- data/tests/illformed/date/channel_pubDate.xml +9 -0
- data/tests/illformed/date/channel_pubDate_asctime.xml +9 -0
- data/tests/illformed/date/channel_pubDate_disney.xml +9 -0
- data/tests/illformed/date/channel_pubDate_disney_at.xml +9 -0
- data/tests/illformed/date/channel_pubDate_disney_ct.xml +9 -0
- data/tests/illformed/date/channel_pubDate_disney_mt.xml +9 -0
- data/tests/illformed/date/channel_pubDate_disney_pt.xml +9 -0
- data/tests/illformed/date/channel_pubDate_greek_1.xml +9 -0
- data/tests/illformed/date/channel_pubDate_hungarian_1.xml +9 -0
- data/tests/illformed/date/channel_pubDate_iso8601_ym.xml +9 -0
- data/tests/illformed/date/channel_pubDate_iso8601_ym_2.xml +9 -0
- data/tests/illformed/date/channel_pubDate_iso8601_ymd.xml +9 -0
- data/tests/illformed/date/channel_pubDate_iso8601_ymd_2.xml +9 -0
- data/tests/illformed/date/channel_pubDate_iso8601_yo_2.xml +9 -0
- data/tests/illformed/date/channel_pubDate_korean_nate.xml +11 -0
- data/tests/illformed/date/channel_pubDate_map_modified.xml +9 -0
- data/tests/illformed/date/channel_pubDate_mssql.xml +9 -0
- data/tests/illformed/date/channel_pubDate_mssql_nofraction.xml +9 -0
- data/tests/illformed/date/channel_pubDate_nosecond.xml +9 -0
- data/tests/illformed/date/channel_pubDate_notime.xml +9 -0
- data/tests/illformed/date/channel_pubDate_rfc2822.xml +9 -0
- data/tests/illformed/date/channel_pubDate_rfc2822_rollover_june_31.xml +9 -0
- data/tests/illformed/date/channel_pubDate_rfc822.xml +9 -0
- data/tests/illformed/date/channel_pubDate_w3dtf_rollover_61m.xml +9 -0
- data/tests/illformed/date/channel_pubDate_w3dtf_rollover_61s.xml +9 -0
- data/tests/illformed/date/channel_pubDate_w3dtf_rollover_leapyear.xml +9 -0
- data/tests/illformed/date/channel_pubDate_w3dtf_rollover_leapyear400.xml +9 -0
- data/tests/illformed/date/channel_pubDate_w3dtf_rollover_nonleapyear.xml +9 -0
- data/tests/illformed/date/channel_pubDate_w3dtf_sf.xml +9 -0
- data/tests/illformed/date/channel_pubDate_w3dtf_tokyo.xml +9 -0
- data/tests/illformed/date/channel_pubDate_w3dtf_utc.xml +9 -0
- data/tests/illformed/date/channel_pubDate_w3dtf_y.xml +9 -0
- data/tests/illformed/date/channel_pubDate_w3dtf_ym.xml +9 -0
- data/tests/illformed/date/channel_pubDate_w3dtf_ymd.xml +9 -0
- data/tests/illformed/date/channel_pubDate_w3dtf_ymd_2.xml +9 -0
- data/tests/illformed/date/entry_created.xml +9 -0
- data/tests/illformed/date/entry_created_w3dtf_utc.xml +9 -0
- data/tests/illformed/date/entry_issued.xml +9 -0
- data/tests/illformed/date/entry_issued_w3dtf_utc.xml +9 -0
- data/tests/illformed/date/entry_modified.xml +9 -0
- data/tests/illformed/date/entry_modified_map_date.xml +9 -0
- data/tests/illformed/date/entry_modified_w3dtf_utc.xml +9 -0
- data/tests/illformed/date/entry_published_w3dtf_utc.xml +9 -0
- data/tests/illformed/date/entry_source_updated_w3dtf_utc.xml +11 -0
- data/tests/illformed/date/entry_updated_w3dtf_utc.xml +9 -0
- data/tests/illformed/date/feed_modified.xml +9 -0
- data/tests/illformed/date/feed_modified_asctime.xml +9 -0
- data/tests/illformed/date/feed_modified_disney.xml +7 -0
- data/tests/illformed/date/feed_modified_disney_at.xml +7 -0
- data/tests/illformed/date/feed_modified_disney_ct.xml +7 -0
- data/tests/illformed/date/feed_modified_disney_mt.xml +7 -0
- data/tests/illformed/date/feed_modified_disney_pt.xml +7 -0
- data/tests/illformed/date/feed_modified_iso8601_ym.xml +9 -0
- data/tests/illformed/date/feed_modified_iso8601_ym_2.xml +9 -0
- data/tests/illformed/date/feed_modified_iso8601_ymd.xml +9 -0
- data/tests/illformed/date/feed_modified_iso8601_ymd_2.xml +9 -0
- data/tests/illformed/date/feed_modified_iso8601_yo_2.xml +9 -0
- data/tests/illformed/date/feed_modified_map_date.xml +9 -0
- data/tests/illformed/date/feed_modified_rfc2822.xml +9 -0
- data/tests/illformed/date/feed_modified_rfc2822_rollover_june_31.xml +9 -0
- data/tests/illformed/date/feed_modified_rfc822.xml +9 -0
- data/tests/illformed/date/feed_modified_w3dtf_rollover_leapyear.xml +9 -0
- data/tests/illformed/date/feed_modified_w3dtf_rollover_leapyear400.xml +9 -0
- data/tests/illformed/date/feed_modified_w3dtf_rollover_nonleapyear.xml +9 -0
- data/tests/illformed/date/feed_modified_w3dtf_sf.xml +9 -0
- data/tests/illformed/date/feed_modified_w3dtf_tokyo.xml +9 -0
- data/tests/illformed/date/feed_modified_w3dtf_utc.xml +9 -0
- data/tests/illformed/date/feed_modified_w3dtf_y.xml +9 -0
- data/tests/illformed/date/feed_modified_w3dtf_ym.xml +9 -0
- data/tests/illformed/date/feed_modified_w3dtf_ymd.xml +9 -0
- data/tests/illformed/date/feed_modified_w3dtf_ymd_2.xml +9 -0
- data/tests/illformed/date/feed_updated_w3dtf_utc.xml +7 -0
- data/tests/illformed/date/http_high_bit_date.xml +12 -0
- data/tests/illformed/date/item_dc_date.xml +11 -0
- data/tests/illformed/date/item_dc_date_map_modified.xml +11 -0
- data/tests/illformed/date/item_dc_date_w3dtf_utc.xml +11 -0
- data/tests/illformed/date/item_dc_date_w3dtf_utc_map_modified_parsed.xml +11 -0
- data/tests/illformed/date/item_dcterms_created.xml +11 -0
- data/tests/illformed/date/item_dcterms_created_w3dtf_utc.xml +11 -0
- data/tests/illformed/date/item_dcterms_issued.xml +11 -0
- data/tests/illformed/date/item_dcterms_issued_w3dtf_utc.xml +11 -0
- data/tests/illformed/date/item_dcterms_modified.xml +11 -0
- data/tests/illformed/date/item_dcterms_modified_map_date.xml +11 -0
- data/tests/illformed/date/item_dcterms_modified_w3dtf_utc.xml +11 -0
- data/tests/illformed/date/item_dcterms_modified_w3dtf_utc_map_date.xml +11 -0
- data/tests/illformed/date/item_expirationDate.xml +11 -0
- data/tests/illformed/date/item_expirationDate_rfc2822.xml +11 -0
- data/tests/illformed/date/item_pubDate.xml +11 -0
- data/tests/illformed/date/item_pubDate_euc-kr.xml +13 -0
- data/tests/illformed/date/item_pubDate_map_modified.xml +11 -0
- data/tests/illformed/date/item_pubDate_rfc2822.xml +11 -0
- data/tests/illformed/encoding/bogus_encoding.xml +7 -0
- data/tests/illformed/encoding/encoding_mismatch_crash.xml +10 -0
- data/tests/illformed/encoding/http_i18n.xml +13 -0
- data/tests/illformed/encoding/http_text_plain.xml +8 -0
- data/tests/illformed/encoding/http_text_plain_charset.xml +8 -0
- data/tests/illformed/encoding/utf-16be-autodetect.xml +0 -0
- data/tests/illformed/encoding/utf-16be-bom.xml +0 -0
- data/tests/illformed/encoding/utf-16be.xml +0 -0
- data/tests/illformed/encoding/utf-16le-autodetect.xml +0 -0
- data/tests/illformed/encoding/utf-16le-bom.xml +0 -0
- data/tests/illformed/encoding/utf-16le.xml +0 -0
- data/tests/illformed/encoding/utf-32be-autodetect.xml +0 -0
- data/tests/illformed/encoding/utf-32be-bom.xml +0 -0
- data/tests/illformed/encoding/utf-32be.xml +0 -0
- data/tests/illformed/encoding/utf-32le-autodetect.xml +0 -0
- data/tests/illformed/encoding/utf-32le-bom.xml +0 -0
- data/tests/illformed/encoding/utf-32le.xml +0 -0
- data/tests/illformed/encoding/utf-8-bom.xml +8 -0
- data/tests/illformed/encoding/x80_437.xml +9 -0
- data/tests/illformed/encoding/x80_850.xml +9 -0
- data/tests/illformed/encoding/x80_852.xml +9 -0
- data/tests/illformed/encoding/x80_855.xml +9 -0
- data/tests/illformed/encoding/x80_857.xml +9 -0
- data/tests/illformed/encoding/x80_860.xml +9 -0
- data/tests/illformed/encoding/x80_861.xml +9 -0
- data/tests/illformed/encoding/x80_862.xml +9 -0
- data/tests/illformed/encoding/x80_863.xml +9 -0
- data/tests/illformed/encoding/x80_865.xml +9 -0
- data/tests/illformed/encoding/x80_866.xml +9 -0
- data/tests/illformed/encoding/x80_cp037.xml +1 -0
- data/tests/illformed/encoding/x80_cp1125.xml +9 -0
- data/tests/illformed/encoding/x80_cp1250.xml +9 -0
- data/tests/illformed/encoding/x80_cp1251.xml +9 -0
- data/tests/illformed/encoding/x80_cp1252.xml +9 -0
- data/tests/illformed/encoding/x80_cp1253.xml +9 -0
- data/tests/illformed/encoding/x80_cp1254.xml +9 -0
- data/tests/illformed/encoding/x80_cp1255.xml +9 -0
- data/tests/illformed/encoding/x80_cp1256.xml +9 -0
- data/tests/illformed/encoding/x80_cp1257.xml +9 -0
- data/tests/illformed/encoding/x80_cp1258.xml +9 -0
- data/tests/illformed/encoding/x80_cp437.xml +9 -0
- data/tests/illformed/encoding/x80_cp500.xml +1 -0
- data/tests/illformed/encoding/x80_cp737.xml +9 -0
- data/tests/illformed/encoding/x80_cp775.xml +9 -0
- data/tests/illformed/encoding/x80_cp850.xml +9 -0
- data/tests/illformed/encoding/x80_cp852.xml +9 -0
- data/tests/illformed/encoding/x80_cp855.xml +9 -0
- data/tests/illformed/encoding/x80_cp856.xml +9 -0
- data/tests/illformed/encoding/x80_cp857.xml +9 -0
- data/tests/illformed/encoding/x80_cp860.xml +9 -0
- data/tests/illformed/encoding/x80_cp861.xml +9 -0
- data/tests/illformed/encoding/x80_cp862.xml +9 -0
- data/tests/illformed/encoding/x80_cp863.xml +9 -0
- data/tests/illformed/encoding/x80_cp864.xml +9 -0
- data/tests/illformed/encoding/x80_cp865.xml +9 -0
- data/tests/illformed/encoding/x80_cp866.xml +9 -0
- data/tests/illformed/encoding/x80_cp874.xml +9 -0
- data/tests/illformed/encoding/x80_cp875.xml +1 -0
- data/tests/illformed/encoding/x80_cp_is.xml +9 -0
- data/tests/illformed/encoding/x80_csibm037.xml +1 -0
- data/tests/illformed/encoding/x80_csibm500.xml +1 -0
- data/tests/illformed/encoding/x80_csibm855.xml +9 -0
- data/tests/illformed/encoding/x80_csibm857.xml +9 -0
- data/tests/illformed/encoding/x80_csibm860.xml +9 -0
- data/tests/illformed/encoding/x80_csibm861.xml +9 -0
- data/tests/illformed/encoding/x80_csibm863.xml +9 -0
- data/tests/illformed/encoding/x80_csibm864.xml +9 -0
- data/tests/illformed/encoding/x80_csibm865.xml +9 -0
- data/tests/illformed/encoding/x80_csibm866.xml +9 -0
- data/tests/illformed/encoding/x80_cskoi8r.xml +9 -0
- data/tests/illformed/encoding/x80_csmacintosh.xml +9 -0
- data/tests/illformed/encoding/x80_cspc775baltic.xml +9 -0
- data/tests/illformed/encoding/x80_cspc850multilingual.xml +9 -0
- data/tests/illformed/encoding/x80_cspc862latinhebrew.xml +9 -0
- data/tests/illformed/encoding/x80_cspc8codepage437.xml +9 -0
- data/tests/illformed/encoding/x80_cspcp852.xml +9 -0
- data/tests/illformed/encoding/x80_dbcs.xml +9 -0
- data/tests/illformed/encoding/x80_ebcdic-cp-be.xml +1 -0
- data/tests/illformed/encoding/x80_ebcdic-cp-ca.xml +1 -0
- data/tests/illformed/encoding/x80_ebcdic-cp-ch.xml +1 -0
- data/tests/illformed/encoding/x80_ebcdic-cp-nl.xml +1 -0
- data/tests/illformed/encoding/x80_ebcdic-cp-us.xml +1 -0
- data/tests/illformed/encoding/x80_ebcdic-cp-wt.xml +1 -0
- data/tests/illformed/encoding/x80_ebcdic_cp_be.xml +1 -0
- data/tests/illformed/encoding/x80_ebcdic_cp_ca.xml +1 -0
- data/tests/illformed/encoding/x80_ebcdic_cp_ch.xml +1 -0
- data/tests/illformed/encoding/x80_ebcdic_cp_nl.xml +1 -0
- data/tests/illformed/encoding/x80_ebcdic_cp_us.xml +1 -0
- data/tests/illformed/encoding/x80_ebcdic_cp_wt.xml +1 -0
- data/tests/illformed/encoding/x80_ibm037.xml +1 -0
- data/tests/illformed/encoding/x80_ibm039.xml +1 -0
- data/tests/illformed/encoding/x80_ibm1140.xml +1 -0
- data/tests/illformed/encoding/x80_ibm437.xml +9 -0
- data/tests/illformed/encoding/x80_ibm500.xml +1 -0
- data/tests/illformed/encoding/x80_ibm775.xml +9 -0
- data/tests/illformed/encoding/x80_ibm850.xml +9 -0
- data/tests/illformed/encoding/x80_ibm852.xml +9 -0
- data/tests/illformed/encoding/x80_ibm855.xml +9 -0
- data/tests/illformed/encoding/x80_ibm857.xml +9 -0
- data/tests/illformed/encoding/x80_ibm860.xml +9 -0
- data/tests/illformed/encoding/x80_ibm861.xml +9 -0
- data/tests/illformed/encoding/x80_ibm862.xml +9 -0
- data/tests/illformed/encoding/x80_ibm863.xml +9 -0
- data/tests/illformed/encoding/x80_ibm864.xml +9 -0
- data/tests/illformed/encoding/x80_ibm865.xml +9 -0
- data/tests/illformed/encoding/x80_ibm866.xml +9 -0
- data/tests/illformed/encoding/x80_koi8-r.xml +9 -0
- data/tests/illformed/encoding/x80_koi8-t.xml +9 -0
- data/tests/illformed/encoding/x80_koi8-u.xml +9 -0
- data/tests/illformed/encoding/x80_mac-cyrillic.xml +9 -0
- data/tests/illformed/encoding/x80_mac.xml +9 -0
- data/tests/illformed/encoding/x80_maccentraleurope.xml +9 -0
- data/tests/illformed/encoding/x80_maccyrillic.xml +9 -0
- data/tests/illformed/encoding/x80_macgreek.xml +9 -0
- data/tests/illformed/encoding/x80_maciceland.xml +9 -0
- data/tests/illformed/encoding/x80_macintosh.xml +9 -0
- data/tests/illformed/encoding/x80_maclatin2.xml +9 -0
- data/tests/illformed/encoding/x80_macroman.xml +9 -0
- data/tests/illformed/encoding/x80_macturkish.xml +9 -0
- data/tests/illformed/encoding/x80_ms-ansi.xml +9 -0
- data/tests/illformed/encoding/x80_ms-arab.xml +9 -0
- data/tests/illformed/encoding/x80_ms-cyrl.xml +9 -0
- data/tests/illformed/encoding/x80_ms-ee.xml +9 -0
- data/tests/illformed/encoding/x80_ms-greek.xml +9 -0
- data/tests/illformed/encoding/x80_ms-hebr.xml +9 -0
- data/tests/illformed/encoding/x80_ms-turk.xml +9 -0
- data/tests/illformed/encoding/x80_tcvn-5712.xml +9 -0
- data/tests/illformed/encoding/x80_tcvn.xml +9 -0
- data/tests/illformed/encoding/x80_tcvn5712-1.xml +9 -0
- data/tests/illformed/encoding/x80_viscii.xml +9 -0
- data/tests/illformed/encoding/x80_winbaltrim.xml +9 -0
- data/tests/illformed/encoding/x80_windows-1250.xml +9 -0
- data/tests/illformed/encoding/x80_windows-1251.xml +9 -0
- data/tests/illformed/encoding/x80_windows-1252.xml +9 -0
- data/tests/illformed/encoding/x80_windows-1253.xml +9 -0
- data/tests/illformed/encoding/x80_windows-1254.xml +9 -0
- data/tests/illformed/encoding/x80_windows-1255.xml +9 -0
- data/tests/illformed/encoding/x80_windows-1256.xml +9 -0
- data/tests/illformed/encoding/x80_windows-1257.xml +9 -0
- data/tests/illformed/encoding/x80_windows-1258.xml +9 -0
- data/tests/illformed/encoding/x80_windows_1250.xml +9 -0
- data/tests/illformed/encoding/x80_windows_1251.xml +9 -0
- data/tests/illformed/encoding/x80_windows_1252.xml +9 -0
- data/tests/illformed/encoding/x80_windows_1253.xml +9 -0
- data/tests/illformed/encoding/x80_windows_1254.xml +9 -0
- data/tests/illformed/encoding/x80_windows_1255.xml +9 -0
- data/tests/illformed/encoding/x80_windows_1256.xml +9 -0
- data/tests/illformed/encoding/x80_windows_1257.xml +9 -0
- data/tests/illformed/encoding/x80_windows_1258.xml +9 -0
- data/tests/illformed/entities/160.xml +9 -0
- data/tests/illformed/entities/732.xml +9 -0
- data/tests/illformed/entities/8216.xml +9 -0
- data/tests/illformed/entities/8217.xml +9 -0
- data/tests/illformed/entities/8220.xml +9 -0
- data/tests/illformed/entities/8221.xml +9 -0
- data/tests/illformed/entities/9830.xml +9 -0
- data/tests/illformed/entities/aacute.xml +9 -0
- data/tests/illformed/entities/acirc.xml +9 -0
- data/tests/illformed/entities/acute.xml +9 -0
- data/tests/illformed/entities/aelig.xml +9 -0
- data/tests/illformed/entities/agrave.xml +9 -0
- data/tests/illformed/entities/alefsym.xml +9 -0
- data/tests/illformed/entities/alpha.xml +9 -0
- data/tests/illformed/entities/and.xml +9 -0
- data/tests/illformed/entities/ang.xml +9 -0
- data/tests/illformed/entities/aring.xml +9 -0
- data/tests/illformed/entities/asymp.xml +9 -0
- data/tests/illformed/entities/atilde.xml +9 -0
- data/tests/illformed/entities/auml.xml +9 -0
- data/tests/illformed/entities/bdquo.xml +9 -0
- data/tests/illformed/entities/beta.xml +9 -0
- data/tests/illformed/entities/brvbar.xml +9 -0
- data/tests/illformed/entities/bull.xml +9 -0
- data/tests/illformed/entities/cap.xml +9 -0
- data/tests/illformed/entities/ccedil.xml +9 -0
- data/tests/illformed/entities/cedil.xml +9 -0
- data/tests/illformed/entities/cent.xml +9 -0
- data/tests/illformed/entities/chi.xml +9 -0
- data/tests/illformed/entities/circ.xml +9 -0
- data/tests/illformed/entities/clubs.xml +9 -0
- data/tests/illformed/entities/cong.xml +9 -0
- data/tests/illformed/entities/copy.xml +9 -0
- data/tests/illformed/entities/crarr.xml +9 -0
- data/tests/illformed/entities/cup.xml +9 -0
- data/tests/illformed/entities/curren.xml +9 -0
- data/tests/illformed/entities/dagger.xml +9 -0
- data/tests/illformed/entities/darr.xml +9 -0
- data/tests/illformed/entities/deg.xml +9 -0
- data/tests/illformed/entities/delta.xml +9 -0
- data/tests/illformed/entities/diams.xml +9 -0
- data/tests/illformed/entities/divide.xml +9 -0
- data/tests/illformed/entities/doesnotexist.xml +9 -0
- data/tests/illformed/entities/eacute.xml +9 -0
- data/tests/illformed/entities/ecirc.xml +9 -0
- data/tests/illformed/entities/egrave.xml +9 -0
- data/tests/illformed/entities/empty.xml +9 -0
- data/tests/illformed/entities/emsp.xml +9 -0
- data/tests/illformed/entities/ensp.xml +9 -0
- data/tests/illformed/entities/epsilon.xml +9 -0
- data/tests/illformed/entities/equiv.xml +9 -0
- data/tests/illformed/entities/eta.xml +9 -0
- data/tests/illformed/entities/eth.xml +9 -0
- data/tests/illformed/entities/euml.xml +9 -0
- data/tests/illformed/entities/euro.xml +9 -0
- data/tests/illformed/entities/exist.xml +9 -0
- data/tests/illformed/entities/fnof.xml +9 -0
- data/tests/illformed/entities/forall.xml +9 -0
- data/tests/illformed/entities/frac12.xml +9 -0
- data/tests/illformed/entities/frac14.xml +9 -0
- data/tests/illformed/entities/frac34.xml +9 -0
- data/tests/illformed/entities/frasl.xml +9 -0
- data/tests/illformed/entities/gamma.xml +9 -0
- data/tests/illformed/entities/ge.xml +9 -0
- data/tests/illformed/entities/hArr.xml +9 -0
- data/tests/illformed/entities/hearts.xml +9 -0
- data/tests/illformed/entities/hellip.xml +9 -0
- data/tests/illformed/entities/iacute.xml +9 -0
- data/tests/illformed/entities/icirc.xml +9 -0
- data/tests/illformed/entities/iexcl.xml +9 -0
- data/tests/illformed/entities/igrave.xml +9 -0
- data/tests/illformed/entities/image.xml +9 -0
- data/tests/illformed/entities/infin.xml +9 -0
- data/tests/illformed/entities/int.xml +9 -0
- data/tests/illformed/entities/iota.xml +9 -0
- data/tests/illformed/entities/iquest.xml +9 -0
- data/tests/illformed/entities/isin.xml +9 -0
- data/tests/illformed/entities/iuml.xml +9 -0
- data/tests/illformed/entities/kappa.xml +9 -0
- data/tests/illformed/entities/lArr.xml +9 -0
- data/tests/illformed/entities/lambda.xml +9 -0
- data/tests/illformed/entities/lang.xml +9 -0
- data/tests/illformed/entities/laquo.xml +9 -0
- data/tests/illformed/entities/lceil.xml +9 -0
- data/tests/illformed/entities/ldquo.xml +9 -0
- data/tests/illformed/entities/le.xml +9 -0
- data/tests/illformed/entities/lfloor.xml +9 -0
- data/tests/illformed/entities/lowast.xml +9 -0
- data/tests/illformed/entities/loz.xml +9 -0
- data/tests/illformed/entities/lrm.xml +9 -0
- data/tests/illformed/entities/lsaquo.xml +9 -0
- data/tests/illformed/entities/lsquo.xml +9 -0
- data/tests/illformed/entities/macr.xml +9 -0
- data/tests/illformed/entities/mdash.xml +9 -0
- data/tests/illformed/entities/micro.xml +9 -0
- data/tests/illformed/entities/middot.xml +9 -0
- data/tests/illformed/entities/minus.xml +9 -0
- data/tests/illformed/entities/mu.xml +9 -0
- data/tests/illformed/entities/nabla.xml +9 -0
- data/tests/illformed/entities/nbsp.xml +9 -0
- data/tests/illformed/entities/ndash.xml +9 -0
- data/tests/illformed/entities/ne.xml +9 -0
- data/tests/illformed/entities/ni.xml +9 -0
- data/tests/illformed/entities/not.xml +9 -0
- data/tests/illformed/entities/notin.xml +9 -0
- data/tests/illformed/entities/nsub.xml +9 -0
- data/tests/illformed/entities/ntilde.xml +9 -0
- data/tests/illformed/entities/nu.xml +9 -0
- data/tests/illformed/entities/oacute.xml +9 -0
- data/tests/illformed/entities/ocirc.xml +9 -0
- data/tests/illformed/entities/oelig.xml +9 -0
- data/tests/illformed/entities/ograve.xml +9 -0
- data/tests/illformed/entities/oline.xml +9 -0
- data/tests/illformed/entities/omega.xml +9 -0
- data/tests/illformed/entities/omicron.xml +9 -0
- data/tests/illformed/entities/oplus.xml +9 -0
- data/tests/illformed/entities/or.xml +9 -0
- data/tests/illformed/entities/ordf.xml +9 -0
- data/tests/illformed/entities/ordm.xml +9 -0
- data/tests/illformed/entities/oslash.xml +9 -0
- data/tests/illformed/entities/otilde.xml +9 -0
- data/tests/illformed/entities/otimes.xml +9 -0
- data/tests/illformed/entities/ouml.xml +9 -0
- data/tests/illformed/entities/para.xml +9 -0
- data/tests/illformed/entities/part.xml +9 -0
- data/tests/illformed/entities/permil.xml +9 -0
- data/tests/illformed/entities/perp.xml +9 -0
- data/tests/illformed/entities/phi.xml +9 -0
- data/tests/illformed/entities/pi.xml +9 -0
- data/tests/illformed/entities/piv.xml +9 -0
- data/tests/illformed/entities/plusmn.xml +9 -0
- data/tests/illformed/entities/pound.xml +9 -0
- data/tests/illformed/entities/prime.xml +9 -0
- data/tests/illformed/entities/prod.xml +9 -0
- data/tests/illformed/entities/prop.xml +9 -0
- data/tests/illformed/entities/psi.xml +9 -0
- data/tests/illformed/entities/radic.xml +9 -0
- data/tests/illformed/entities/rang.xml +9 -0
- data/tests/illformed/entities/raquo.xml +9 -0
- data/tests/illformed/entities/rarr.xml +9 -0
- data/tests/illformed/entities/rceil.xml +9 -0
- data/tests/illformed/entities/rdquo.xml +9 -0
- data/tests/illformed/entities/real.xml +9 -0
- data/tests/illformed/entities/reg.xml +9 -0
- data/tests/illformed/entities/rfloor.xml +9 -0
- data/tests/illformed/entities/rho.xml +9 -0
- data/tests/illformed/entities/rlm.xml +9 -0
- data/tests/illformed/entities/rsaquo.xml +9 -0
- data/tests/illformed/entities/rsquo.xml +9 -0
- data/tests/illformed/entities/sbquo.xml +9 -0
- data/tests/illformed/entities/scaron.xml +9 -0
- data/tests/illformed/entities/sdot.xml +9 -0
- data/tests/illformed/entities/sect.xml +9 -0
- data/tests/illformed/entities/shy.xml +9 -0
- data/tests/illformed/entities/sigma.xml +9 -0
- data/tests/illformed/entities/sigmaf.xml +9 -0
- data/tests/illformed/entities/sim.xml +9 -0
- data/tests/illformed/entities/spades.xml +9 -0
- data/tests/illformed/entities/sub.xml +9 -0
- data/tests/illformed/entities/sube.xml +9 -0
- data/tests/illformed/entities/sum.xml +9 -0
- data/tests/illformed/entities/sup.xml +9 -0
- data/tests/illformed/entities/sup1.xml +9 -0
- data/tests/illformed/entities/sup2.xml +9 -0
- data/tests/illformed/entities/sup3.xml +9 -0
- data/tests/illformed/entities/supe.xml +9 -0
- data/tests/illformed/entities/szlig.xml +9 -0
- data/tests/illformed/entities/tau.xml +9 -0
- data/tests/illformed/entities/there4.xml +9 -0
- data/tests/illformed/entities/theta.xml +9 -0
- data/tests/illformed/entities/thetasym.xml +9 -0
- data/tests/illformed/entities/thinsp.xml +9 -0
- data/tests/illformed/entities/thorn.xml +9 -0
- data/tests/illformed/entities/tilde.xml +9 -0
- data/tests/illformed/entities/times.xml +9 -0
- data/tests/illformed/entities/trade.xml +9 -0
- data/tests/illformed/entities/uacute.xml +9 -0
- data/tests/illformed/entities/uarr.xml +9 -0
- data/tests/illformed/entities/ucirc.xml +9 -0
- data/tests/illformed/entities/ugrave.xml +9 -0
- data/tests/illformed/entities/uml.xml +9 -0
- data/tests/illformed/entities/upper_AElig.xml +9 -0
- data/tests/illformed/entities/upper_Aacute.xml +9 -0
- data/tests/illformed/entities/upper_Acirc.xml +9 -0
- data/tests/illformed/entities/upper_Agrave.xml +9 -0
- data/tests/illformed/entities/upper_Alpha.xml +9 -0
- data/tests/illformed/entities/upper_Aring.xml +9 -0
- data/tests/illformed/entities/upper_Atilde.xml +9 -0
- data/tests/illformed/entities/upper_Auml.xml +9 -0
- data/tests/illformed/entities/upper_Beta.xml +9 -0
- data/tests/illformed/entities/upper_Ccedil.xml +9 -0
- data/tests/illformed/entities/upper_Chi.xml +9 -0
- data/tests/illformed/entities/upper_Dagger.xml +9 -0
- data/tests/illformed/entities/upper_Delta.xml +9 -0
- data/tests/illformed/entities/upper_ETH.xml +9 -0
- data/tests/illformed/entities/upper_Eacute.xml +9 -0
- data/tests/illformed/entities/upper_Ecirc.xml +9 -0
- data/tests/illformed/entities/upper_Egrave.xml +9 -0
- data/tests/illformed/entities/upper_Epsilon.xml +9 -0
- data/tests/illformed/entities/upper_Eta.xml +9 -0
- data/tests/illformed/entities/upper_Euml.xml +9 -0
- data/tests/illformed/entities/upper_Gamma.xml +9 -0
- data/tests/illformed/entities/upper_Iacute.xml +9 -0
- data/tests/illformed/entities/upper_Icirc.xml +9 -0
- data/tests/illformed/entities/upper_Igrave.xml +9 -0
- data/tests/illformed/entities/upper_Iota.xml +9 -0
- data/tests/illformed/entities/upper_Iuml.xml +9 -0
- data/tests/illformed/entities/upper_Kappa.xml +9 -0
- data/tests/illformed/entities/upper_Lambda.xml +9 -0
- data/tests/illformed/entities/upper_Mu.xml +9 -0
- data/tests/illformed/entities/upper_Ntilde.xml +9 -0
- data/tests/illformed/entities/upper_Nu.xml +9 -0
- data/tests/illformed/entities/upper_OElig.xml +9 -0
- data/tests/illformed/entities/upper_Oacute.xml +9 -0
- data/tests/illformed/entities/upper_Ocirc.xml +9 -0
- data/tests/illformed/entities/upper_Ograve.xml +9 -0
- data/tests/illformed/entities/upper_Omega.xml +9 -0
- data/tests/illformed/entities/upper_Omicron.xml +9 -0
- data/tests/illformed/entities/upper_Oslash.xml +9 -0
- data/tests/illformed/entities/upper_Otilde.xml +9 -0
- data/tests/illformed/entities/upper_Ouml.xml +9 -0
- data/tests/illformed/entities/upper_Phi.xml +9 -0
- data/tests/illformed/entities/upper_Pi.xml +9 -0
- data/tests/illformed/entities/upper_Prime.xml +9 -0
- data/tests/illformed/entities/upper_Psi.xml +9 -0
- data/tests/illformed/entities/upper_Rho.xml +9 -0
- data/tests/illformed/entities/upper_Scaron.xml +9 -0
- data/tests/illformed/entities/upper_Sigma.xml +9 -0
- data/tests/illformed/entities/upper_THORN.xml +9 -0
- data/tests/illformed/entities/upper_Tau.xml +9 -0
- data/tests/illformed/entities/upper_Theta.xml +9 -0
- data/tests/illformed/entities/upper_Uacute.xml +9 -0
- data/tests/illformed/entities/upper_Ucirc.xml +9 -0
- data/tests/illformed/entities/upper_Ugrave.xml +9 -0
- data/tests/illformed/entities/upper_Upsilon.xml +9 -0
- data/tests/illformed/entities/upper_Uuml.xml +9 -0
- data/tests/illformed/entities/upper_Xi.xml +9 -0
- data/tests/illformed/entities/upper_Yacute.xml +9 -0
- data/tests/illformed/entities/upper_Yuml.xml +9 -0
- data/tests/illformed/entities/upper_Zeta.xml +9 -0
- data/tests/illformed/entities/upsih.xml +9 -0
- data/tests/illformed/entities/upsilon.xml +9 -0
- data/tests/illformed/entities/uuml.xml +9 -0
- data/tests/illformed/entities/weierp.xml +9 -0
- data/tests/illformed/entities/xi.xml +9 -0
- data/tests/illformed/entities/yacute.xml +9 -0
- data/tests/illformed/entities/yen.xml +9 -0
- data/tests/illformed/entities/yuml.xml +9 -0
- data/tests/illformed/entities/zeta.xml +9 -0
- data/tests/illformed/entities/zwj.xml +9 -0
- data/tests/illformed/entities/zwnj.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_block.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_block_false.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_block_no.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_block_true.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_block_uppercase.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_block_whitespace.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_category.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_category_nested.xml +11 -0
- data/tests/illformed/itunes/itunes_channel_category_scheme.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_explicit.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_explicit_false.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_explicit_no.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_explicit_true.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_explicit_uppercase.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_explicit_whitespace.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_image.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_keywords.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_keywords_duplicate.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_keywords_duplicate_2.xml +10 -0
- data/tests/illformed/itunes/itunes_channel_keywords_multiple.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_link_image.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_owner_email.xml +12 -0
- data/tests/illformed/itunes/itunes_channel_owner_name.xml +12 -0
- data/tests/illformed/itunes/itunes_channel_subtitle.xml +9 -0
- data/tests/illformed/itunes/itunes_channel_summary.xml +9 -0
- data/tests/illformed/itunes/itunes_core_element_uppercase.xml +9 -0
- data/tests/illformed/itunes/itunes_enclosure_url_maps_id.xml +11 -0
- data/tests/illformed/itunes/itunes_enclosure_url_maps_id_2.xml +12 -0
- data/tests/illformed/itunes/itunes_item_author_map_author.xml +11 -0
- data/tests/illformed/itunes/itunes_item_block.xml +11 -0
- data/tests/illformed/itunes/itunes_item_block_false.xml +11 -0
- data/tests/illformed/itunes/itunes_item_block_no.xml +11 -0
- data/tests/illformed/itunes/itunes_item_block_true.xml +11 -0
- data/tests/illformed/itunes/itunes_item_block_uppercase.xml +11 -0
- data/tests/illformed/itunes/itunes_item_block_whitespace.xml +11 -0
- data/tests/illformed/itunes/itunes_item_category.xml +11 -0
- data/tests/illformed/itunes/itunes_item_category_nested.xml +13 -0
- data/tests/illformed/itunes/itunes_item_category_scheme.xml +11 -0
- data/tests/illformed/itunes/itunes_item_duration.xml +11 -0
- data/tests/illformed/itunes/itunes_item_explicit.xml +11 -0
- data/tests/illformed/itunes/itunes_item_explicit_false.xml +11 -0
- data/tests/illformed/itunes/itunes_item_explicit_no.xml +11 -0
- data/tests/illformed/itunes/itunes_item_explicit_true.xml +11 -0
- data/tests/illformed/itunes/itunes_item_explicit_uppercase.xml +11 -0
- data/tests/illformed/itunes/itunes_item_explicit_whitespace.xml +11 -0
- data/tests/illformed/itunes/itunes_item_image.xml +11 -0
- data/tests/illformed/itunes/itunes_item_link_image.xml +11 -0
- data/tests/illformed/itunes/itunes_item_subtitle.xml +11 -0
- data/tests/illformed/itunes/itunes_item_summary.xml +11 -0
- data/tests/illformed/itunes/itunes_namespace.xml +9 -0
- data/tests/illformed/itunes/itunes_namespace_example.xml +9 -0
- data/tests/illformed/itunes/itunes_namespace_lowercase.xml +9 -0
- data/tests/illformed/itunes/itunes_namespace_uppercase.xml +9 -0
- data/tests/illformed/lang/channel_dc_language.xml +9 -0
- data/tests/illformed/lang/channel_language.xml +9 -0
- data/tests/illformed/lang/entry_content_xml_lang.xml +9 -0
- data/tests/illformed/lang/entry_content_xml_lang_blank.xml +9 -0
- data/tests/illformed/lang/entry_content_xml_lang_blank_2.xml +9 -0
- data/tests/illformed/lang/entry_content_xml_lang_blank_3.xml +12 -0
- data/tests/illformed/lang/entry_content_xml_lang_inherit.xml +9 -0
- data/tests/illformed/lang/entry_content_xml_lang_inherit_2.xml +9 -0
- data/tests/illformed/lang/entry_content_xml_lang_inherit_3.xml +10 -0
- data/tests/illformed/lang/entry_content_xml_lang_inherit_4.xml +10 -0
- data/tests/illformed/lang/entry_summary_xml_lang.xml +9 -0
- data/tests/illformed/lang/entry_summary_xml_lang_blank.xml +9 -0
- data/tests/illformed/lang/entry_summary_xml_lang_inherit.xml +9 -0
- data/tests/illformed/lang/entry_summary_xml_lang_inherit_2.xml +9 -0
- data/tests/illformed/lang/entry_summary_xml_lang_inherit_3.xml +10 -0
- data/tests/illformed/lang/entry_summary_xml_lang_inherit_4.xml +10 -0
- data/tests/illformed/lang/entry_title_xml_lang.xml +9 -0
- data/tests/illformed/lang/entry_title_xml_lang_blank.xml +9 -0
- data/tests/illformed/lang/entry_title_xml_lang_inherit.xml +9 -0
- data/tests/illformed/lang/entry_title_xml_lang_inherit_2.xml +9 -0
- data/tests/illformed/lang/entry_title_xml_lang_inherit_3.xml +10 -0
- data/tests/illformed/lang/entry_title_xml_lang_inherit_4.xml +10 -0
- data/tests/illformed/lang/feed_copyright_xml_lang.xml +7 -0
- data/tests/illformed/lang/feed_copyright_xml_lang_blank.xml +7 -0
- data/tests/illformed/lang/feed_copyright_xml_lang_inherit.xml +7 -0
- data/tests/illformed/lang/feed_copyright_xml_lang_inherit_2.xml +7 -0
- data/tests/illformed/lang/feed_copyright_xml_lang_inherit_3.xml +8 -0
- data/tests/illformed/lang/feed_copyright_xml_lang_inherit_4.xml +8 -0
- data/tests/illformed/lang/feed_info_xml_lang.xml +7 -0
- data/tests/illformed/lang/feed_info_xml_lang_blank.xml +7 -0
- data/tests/illformed/lang/feed_info_xml_lang_inherit.xml +7 -0
- data/tests/illformed/lang/feed_info_xml_lang_inherit_2.xml +7 -0
- data/tests/illformed/lang/feed_info_xml_lang_inherit_3.xml +8 -0
- data/tests/illformed/lang/feed_info_xml_lang_inherit_4.xml +8 -0
- data/tests/illformed/lang/feed_language.xml +9 -0
- data/tests/illformed/lang/feed_language_override.xml +9 -0
- data/tests/illformed/lang/feed_not_xml_lang.xml +7 -0
- data/tests/illformed/lang/feed_not_xml_lang_2.xml +7 -0
- data/tests/illformed/lang/feed_tagline_xml_lang.xml +7 -0
- data/tests/illformed/lang/feed_tagline_xml_lang_blank.xml +7 -0
- data/tests/illformed/lang/feed_tagline_xml_lang_inherit.xml +7 -0
- data/tests/illformed/lang/feed_tagline_xml_lang_inherit_2.xml +7 -0
- data/tests/illformed/lang/feed_tagline_xml_lang_inherit_3.xml +8 -0
- data/tests/illformed/lang/feed_tagline_xml_lang_inherit_4.xml +8 -0
- data/tests/illformed/lang/feed_title_xml_lang.xml +7 -0
- data/tests/illformed/lang/feed_title_xml_lang_blank.xml +7 -0
- data/tests/illformed/lang/feed_title_xml_lang_inherit.xml +7 -0
- data/tests/illformed/lang/feed_title_xml_lang_inherit_2.xml +7 -0
- data/tests/illformed/lang/feed_title_xml_lang_inherit_3.xml +8 -0
- data/tests/illformed/lang/feed_title_xml_lang_inherit_4.xml +8 -0
- data/tests/illformed/lang/feed_xml_lang.xml +6 -0
- data/tests/illformed/lang/http_content_language.xml +7 -0
- data/tests/illformed/lang/http_content_language_entry_title_inherit.xml +10 -0
- data/tests/illformed/lang/http_content_language_entry_title_inherit_2.xml +11 -0
- data/tests/illformed/lang/http_content_language_feed_language.xml +10 -0
- data/tests/illformed/lang/http_content_language_feed_xml_lang.xml +7 -0
- data/tests/illformed/lang/item_content_encoded_xml_lang.xml +11 -0
- data/tests/illformed/lang/item_content_encoded_xml_lang_inherit.xml +11 -0
- data/tests/illformed/lang/item_dc_language.xml +11 -0
- data/tests/illformed/lang/item_fullitem_xml_lang.xml +11 -0
- data/tests/illformed/lang/item_fullitem_xml_lang_inherit.xml +11 -0
- data/tests/illformed/lang/item_xhtml_body_xml_lang.xml +13 -0
- data/tests/illformed/lang/item_xhtml_body_xml_lang_inherit.xml +13 -0
- data/tests/illformed/namespace/rss1.0withModules.xml +47 -0
- data/tests/illformed/namespace/rss1.0withModulesNoDefNS.xml +48 -0
- data/tests/illformed/namespace/rss1.0withModulesNoDefNSLocalNameClash.xml +53 -0
- data/tests/illformed/namespace/rss2.0NSwithModules.xml +50 -0
- data/tests/illformed/namespace/rss2.0NSwithModulesNoDefNS.xml +50 -0
- data/tests/illformed/namespace/rss2.0NSwithModulesNoDefNSLocalNameClash.xml +58 -0
- data/tests/illformed/namespace/rss2.0noNSwithModules.xml +49 -0
- data/tests/illformed/namespace/rss2.0noNSwithModulesLocalNameClash.xml +57 -0
- data/tests/illformed/namespace/undeclared_namespace.xml +10 -0
- data/tests/illformed/rdf/rdf_channel_description.xml +9 -0
- data/tests/illformed/rdf/rdf_channel_empty_textinput.xml +26 -0
- data/tests/illformed/rdf/rdf_channel_link.xml +9 -0
- data/tests/illformed/rdf/rdf_channel_title.xml +9 -0
- data/tests/illformed/rdf/rdf_item_description.xml +16 -0
- data/tests/illformed/rdf/rdf_item_link.xml +16 -0
- data/tests/illformed/rdf/rdf_item_rdf_about.xml +15 -0
- data/tests/illformed/rdf/rdf_item_title.xml +16 -0
- data/tests/illformed/rdf/rss090_channel_title.xml +12 -0
- data/tests/illformed/rdf/rss090_item_title.xml +12 -0
- data/tests/illformed/rdf/rss_version_10.xml +6 -0
- data/tests/illformed/rdf/rss_version_10_not_default_ns.xml +8 -0
- data/tests/illformed/rss/aaa_illformed.xml +6 -0
- data/tests/illformed/rss/channel_author.xml +9 -0
- data/tests/illformed/rss/channel_author_map_author_detail_email.xml +9 -0
- data/tests/illformed/rss/channel_author_map_author_detail_email_2.xml +9 -0
- data/tests/illformed/rss/channel_author_map_author_detail_email_3.xml +9 -0
- data/tests/illformed/rss/channel_author_map_author_detail_name.xml +9 -0
- data/tests/illformed/rss/channel_author_map_author_detail_name_2.xml +9 -0
- data/tests/illformed/rss/channel_category.xml +9 -0
- data/tests/illformed/rss/channel_category_domain.xml +9 -0
- data/tests/illformed/rss/channel_category_multiple.xml +10 -0
- data/tests/illformed/rss/channel_category_multiple_2.xml +10 -0
- data/tests/illformed/rss/channel_cloud_domain.xml +9 -0
- data/tests/illformed/rss/channel_cloud_path.xml +9 -0
- data/tests/illformed/rss/channel_cloud_port.xml +9 -0
- data/tests/illformed/rss/channel_cloud_protocol.xml +9 -0
- data/tests/illformed/rss/channel_cloud_registerProcedure.xml +9 -0
- data/tests/illformed/rss/channel_copyright.xml +9 -0
- data/tests/illformed/rss/channel_dc_author.xml +9 -0
- data/tests/illformed/rss/channel_dc_author_map_author_detail_email.xml +9 -0
- data/tests/illformed/rss/channel_dc_author_map_author_detail_name.xml +9 -0
- data/tests/illformed/rss/channel_dc_contributor.xml +9 -0
- data/tests/illformed/rss/channel_dc_creator.xml +9 -0
- data/tests/illformed/rss/channel_dc_creator_map_author_detail_email.xml +9 -0
- data/tests/illformed/rss/channel_dc_creator_map_author_detail_name.xml +9 -0
- data/tests/illformed/rss/channel_dc_publisher.xml +9 -0
- data/tests/illformed/rss/channel_dc_publisher_email.xml +9 -0
- data/tests/illformed/rss/channel_dc_publisher_name.xml +9 -0
- data/tests/illformed/rss/channel_dc_rights.xml +9 -0
- data/tests/illformed/rss/channel_dc_subject.xml +9 -0
- data/tests/illformed/rss/channel_dc_subject_2.xml +9 -0
- data/tests/illformed/rss/channel_dc_subject_multiple.xml +10 -0
- data/tests/illformed/rss/channel_dc_title.xml +9 -0
- data/tests/illformed/rss/channel_description.xml +9 -0
- data/tests/illformed/rss/channel_description_escaped_markup.xml +9 -0
- data/tests/illformed/rss/channel_description_map_tagline.xml +9 -0
- data/tests/illformed/rss/channel_description_naked_markup.xml +9 -0
- data/tests/illformed/rss/channel_description_shorttag.xml +10 -0
- data/tests/illformed/rss/channel_docs.xml +9 -0
- data/tests/illformed/rss/channel_generator.xml +9 -0
- data/tests/illformed/rss/channel_image_description.xml +16 -0
- data/tests/illformed/rss/channel_image_height.xml +16 -0
- data/tests/illformed/rss/channel_image_link.xml +16 -0
- data/tests/illformed/rss/channel_image_link_conflict.xml +12 -0
- data/tests/illformed/rss/channel_image_title.xml +16 -0
- data/tests/illformed/rss/channel_image_title_conflict.xml +12 -0
- data/tests/illformed/rss/channel_image_url.xml +16 -0
- data/tests/illformed/rss/channel_image_width.xml +16 -0
- data/tests/illformed/rss/channel_link.xml +9 -0
- data/tests/illformed/rss/channel_managingEditor.xml +9 -0
- data/tests/illformed/rss/channel_managingEditor_map_author_detail_email.xml +9 -0
- data/tests/illformed/rss/channel_managingEditor_map_author_detail_name.xml +9 -0
- data/tests/illformed/rss/channel_textInput_description.xml +14 -0
- data/tests/illformed/rss/channel_textInput_description_conflict.xml +12 -0
- data/tests/illformed/rss/channel_textInput_link.xml +12 -0
- data/tests/illformed/rss/channel_textInput_link_conflict.xml +12 -0
- data/tests/illformed/rss/channel_textInput_name.xml +11 -0
- data/tests/illformed/rss/channel_textInput_title.xml +12 -0
- data/tests/illformed/rss/channel_textInput_title_conflict.xml +12 -0
- data/tests/illformed/rss/channel_title.xml +9 -0
- data/tests/illformed/rss/channel_title_apos.xml +9 -0
- data/tests/illformed/rss/channel_title_gt.xml +9 -0
- data/tests/illformed/rss/channel_title_lt.xml +9 -0
- data/tests/illformed/rss/channel_ttl.xml +9 -0
- data/tests/illformed/rss/channel_webMaster.xml +9 -0
- data/tests/illformed/rss/channel_webMaster_email.xml +9 -0
- data/tests/illformed/rss/channel_webMaster_name.xml +9 -0
- data/tests/illformed/rss/item_author.xml +11 -0
- data/tests/illformed/rss/item_author_map_author_detail_email.xml +11 -0
- data/tests/illformed/rss/item_author_map_author_detail_name.xml +11 -0
- data/tests/illformed/rss/item_category.xml +11 -0
- data/tests/illformed/rss/item_category_domain.xml +11 -0
- data/tests/illformed/rss/item_category_multiple.xml +12 -0
- data/tests/illformed/rss/item_category_multiple_2.xml +12 -0
- data/tests/illformed/rss/item_comments.xml +11 -0
- data/tests/illformed/rss/item_content_encoded.xml +11 -0
- data/tests/illformed/rss/item_content_encoded_mode.xml +11 -0
- data/tests/illformed/rss/item_content_encoded_type.xml +11 -0
- data/tests/illformed/rss/item_dc_author.xml +11 -0
- data/tests/illformed/rss/item_dc_author_map_author_detail_email.xml +11 -0
- data/tests/illformed/rss/item_dc_author_map_author_detail_name.xml +11 -0
- data/tests/illformed/rss/item_dc_contributor.xml +11 -0
- data/tests/illformed/rss/item_dc_creator.xml +11 -0
- data/tests/illformed/rss/item_dc_creator_map_author_detail_email.xml +11 -0
- data/tests/illformed/rss/item_dc_creator_map_author_detail_name.xml +11 -0
- data/tests/illformed/rss/item_dc_publisher.xml +11 -0
- data/tests/illformed/rss/item_dc_publisher_email.xml +11 -0
- data/tests/illformed/rss/item_dc_publisher_name.xml +11 -0
- data/tests/illformed/rss/item_dc_rights.xml +11 -0
- data/tests/illformed/rss/item_dc_subject.xml +11 -0
- data/tests/illformed/rss/item_dc_subject_2.xml +11 -0
- data/tests/illformed/rss/item_dc_subject_multiple.xml +12 -0
- data/tests/illformed/rss/item_dc_title.xml +11 -0
- data/tests/illformed/rss/item_description.xml +11 -0
- data/tests/illformed/rss/item_description_and_summary.xml +12 -0
- data/tests/illformed/rss/item_description_br.xml +11 -0
- data/tests/illformed/rss/item_description_br_shorttag.xml +12 -0
- data/tests/illformed/rss/item_description_escaped_markup.xml +11 -0
- data/tests/illformed/rss/item_description_map_summary.xml +11 -0
- data/tests/illformed/rss/item_description_naked_markup.xml +11 -0
- data/tests/illformed/rss/item_description_not_a_doctype.xml +9 -0
- data/tests/illformed/rss/item_enclosure_length.xml +12 -0
- data/tests/illformed/rss/item_enclosure_multiple.xml +13 -0
- data/tests/illformed/rss/item_enclosure_type.xml +12 -0
- data/tests/illformed/rss/item_enclosure_url.xml +12 -0
- data/tests/illformed/rss/item_fullitem.xml +11 -0
- data/tests/illformed/rss/item_fullitem_mode.xml +11 -0
- data/tests/illformed/rss/item_fullitem_type.xml +11 -0
- data/tests/illformed/rss/item_guid.xml +11 -0
- data/tests/illformed/rss/item_guid_conflict_link.xml +12 -0
- data/tests/illformed/rss/item_guid_guidislink.xml +11 -0
- data/tests/illformed/rss/item_guid_isPermaLink_conflict_link.xml +12 -0
- data/tests/illformed/rss/item_guid_isPermaLink_conflict_link_not_guidislink.xml +12 -0
- data/tests/illformed/rss/item_guid_isPermaLink_guidislink.xml +11 -0
- data/tests/illformed/rss/item_guid_isPermaLink_map_link.xml +11 -0
- data/tests/illformed/rss/item_guid_map_link.xml +11 -0
- data/tests/illformed/rss/item_guid_not_permalink.xml +11 -0
- data/tests/illformed/rss/item_guid_not_permalink_conflict_link.xml +12 -0
- data/tests/illformed/rss/item_guid_not_permalink_not_guidislink.xml +11 -0
- data/tests/illformed/rss/item_guid_not_permalink_not_guidislink_2.xml +12 -0
- data/tests/illformed/rss/item_link.xml +11 -0
- data/tests/illformed/rss/item_source.xml +12 -0
- data/tests/illformed/rss/item_source_url.xml +12 -0
- data/tests/illformed/rss/item_summary_and_description.xml +12 -0
- data/tests/illformed/rss/item_title.xml +11 -0
- data/tests/illformed/rss/item_xhtml_body.xml +13 -0
- data/tests/illformed/rss/item_xhtml_body_mode.xml +13 -0
- data/tests/illformed/rss/item_xhtml_body_type.xml +13 -0
- data/tests/illformed/rss/rss_namespace_1.xml +9 -0
- data/tests/illformed/rss/rss_namespace_2.xml +9 -0
- data/tests/illformed/rss/rss_namespace_3.xml +9 -0
- data/tests/illformed/rss/rss_namespace_4.xml +9 -0
- data/tests/illformed/rss/rss_version_090.xml +6 -0
- data/tests/illformed/rss/rss_version_091_netscape.xml +7 -0
- data/tests/illformed/rss/rss_version_092.xml +6 -0
- data/tests/illformed/rss/rss_version_093.xml +6 -0
- data/tests/illformed/rss/rss_version_094.xml +6 -0
- data/tests/illformed/rss/rss_version_20.xml +6 -0
- data/tests/illformed/rss/rss_version_201.xml +6 -0
- data/tests/illformed/rss/rss_version_21.xml +6 -0
- data/tests/illformed/rss/rss_version_missing.xml +9 -0
- data/tests/illformed/sanitize/entry_content_applet.xml +9 -0
- data/tests/illformed/sanitize/entry_content_blink.xml +9 -0
- data/tests/illformed/sanitize/entry_content_crazy.xml +75 -0
- data/tests/illformed/sanitize/entry_content_embed.xml +9 -0
- data/tests/illformed/sanitize/entry_content_frame.xml +9 -0
- data/tests/illformed/sanitize/entry_content_iframe.xml +9 -0
- data/tests/illformed/sanitize/entry_content_link.xml +9 -0
- data/tests/illformed/sanitize/entry_content_meta.xml +9 -0
- data/tests/illformed/sanitize/entry_content_object.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onabort.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onblur.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onchange.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onclick.xml +9 -0
- data/tests/illformed/sanitize/entry_content_ondblclick.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onerror.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onfocus.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onkeydown.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onkeypress.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onkeyup.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onload.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onmousedown.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onmouseout.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onmouseover.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onmouseup.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onreset.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onresize.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onsubmit.xml +9 -0
- data/tests/illformed/sanitize/entry_content_onunload.xml +9 -0
- data/tests/illformed/sanitize/entry_content_script.xml +9 -0
- data/tests/illformed/sanitize/entry_content_script_base64.xml +12 -0
- data/tests/illformed/sanitize/entry_content_script_cdata.xml +9 -0
- data/tests/illformed/sanitize/entry_content_script_inline.xml +9 -0
- data/tests/illformed/sanitize/entry_content_style.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_applet.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_blink.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_crazy.xml +75 -0
- data/tests/illformed/sanitize/entry_summary_embed.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_frame.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_iframe.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_link.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_meta.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_object.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onabort.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onblur.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onchange.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onclick.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_ondblclick.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onerror.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onfocus.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onkeydown.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onkeypress.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onkeyup.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onload.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onmousedown.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onmouseout.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onmouseover.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onmouseup.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onreset.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onresize.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onsubmit.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_onunload.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_script.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_script_base64.xml +12 -0
- data/tests/illformed/sanitize/entry_summary_script_cdata.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_script_inline.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_script_map_description.xml +9 -0
- data/tests/illformed/sanitize/entry_summary_style.xml +9 -0
- data/tests/illformed/sanitize/entry_title_applet.xml +9 -0
- data/tests/illformed/sanitize/entry_title_blink.xml +9 -0
- data/tests/illformed/sanitize/entry_title_crazy.xml +75 -0
- data/tests/illformed/sanitize/entry_title_embed.xml +9 -0
- data/tests/illformed/sanitize/entry_title_frame.xml +9 -0
- data/tests/illformed/sanitize/entry_title_iframe.xml +9 -0
- data/tests/illformed/sanitize/entry_title_link.xml +9 -0
- data/tests/illformed/sanitize/entry_title_meta.xml +9 -0
- data/tests/illformed/sanitize/entry_title_object.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onabort.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onblur.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onchange.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onclick.xml +9 -0
- data/tests/illformed/sanitize/entry_title_ondblclick.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onerror.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onfocus.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onkeydown.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onkeypress.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onkeyup.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onload.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onmousedown.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onmouseout.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onmouseover.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onmouseup.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onreset.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onresize.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onsubmit.xml +9 -0
- data/tests/illformed/sanitize/entry_title_onunload.xml +9 -0
- data/tests/illformed/sanitize/entry_title_script.xml +9 -0
- data/tests/illformed/sanitize/entry_title_script_cdata.xml +9 -0
- data/tests/illformed/sanitize/entry_title_script_inline.xml +9 -0
- data/tests/illformed/sanitize/entry_title_style.xml +9 -0
- data/tests/illformed/sanitize/feed_copyright_applet.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_blink.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_crazy.xml +73 -0
- data/tests/illformed/sanitize/feed_copyright_embed.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_frame.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_iframe.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_link.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_meta.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_object.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onabort.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onblur.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onchange.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onclick.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_ondblclick.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onerror.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onfocus.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onkeydown.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onkeypress.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onkeyup.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onload.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onmousedown.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onmouseout.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onmouseover.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onmouseup.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onreset.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onresize.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onsubmit.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_onunload.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_script.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_script_cdata.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_script_inline.xml +7 -0
- data/tests/illformed/sanitize/feed_copyright_style.xml +7 -0
- data/tests/illformed/sanitize/feed_info_applet.xml +7 -0
- data/tests/illformed/sanitize/feed_info_blink.xml +7 -0
- data/tests/illformed/sanitize/feed_info_crazy.xml +73 -0
- data/tests/illformed/sanitize/feed_info_embed.xml +7 -0
- data/tests/illformed/sanitize/feed_info_frame.xml +7 -0
- data/tests/illformed/sanitize/feed_info_iframe.xml +7 -0
- data/tests/illformed/sanitize/feed_info_link.xml +7 -0
- data/tests/illformed/sanitize/feed_info_meta.xml +7 -0
- data/tests/illformed/sanitize/feed_info_object.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onabort.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onblur.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onchange.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onclick.xml +7 -0
- data/tests/illformed/sanitize/feed_info_ondblclick.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onerror.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onfocus.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onkeydown.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onkeypress.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onkeyup.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onload.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onmousedown.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onmouseout.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onmouseover.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onmouseup.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onreset.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onresize.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onsubmit.xml +7 -0
- data/tests/illformed/sanitize/feed_info_onunload.xml +7 -0
- data/tests/illformed/sanitize/feed_info_script.xml +7 -0
- data/tests/illformed/sanitize/feed_info_script_cdata.xml +7 -0
- data/tests/illformed/sanitize/feed_info_script_inline.xml +7 -0
- data/tests/illformed/sanitize/feed_info_style.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_applet.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_blink.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_crazy.xml +73 -0
- data/tests/illformed/sanitize/feed_subtitle_embed.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_frame.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_iframe.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_link.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_meta.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_object.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onabort.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onblur.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onchange.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onclick.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_ondblclick.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onerror.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onfocus.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onkeydown.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onkeypress.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onkeyup.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onload.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onmousedown.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onmouseout.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onmouseover.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onmouseup.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onreset.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onresize.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onsubmit.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_onunload.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_script.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_script_cdata.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_script_inline.xml +7 -0
- data/tests/illformed/sanitize/feed_subtitle_style.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_applet.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_blink.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_crazy.xml +73 -0
- data/tests/illformed/sanitize/feed_tagline_embed.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_frame.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_iframe.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_link.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_meta.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_object.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onabort.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onblur.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onchange.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onclick.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_ondblclick.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onerror.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onfocus.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onkeydown.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onkeypress.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onkeyup.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onload.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onmousedown.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onmouseout.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onmouseover.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onmouseup.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onreset.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onresize.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onsubmit.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_onunload.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_script.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_script_cdata.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_script_inline.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_script_map_description.xml +7 -0
- data/tests/illformed/sanitize/feed_tagline_style.xml +7 -0
- data/tests/illformed/sanitize/feed_title_applet.xml +7 -0
- data/tests/illformed/sanitize/feed_title_blink.xml +7 -0
- data/tests/illformed/sanitize/feed_title_crazy.xml +73 -0
- data/tests/illformed/sanitize/feed_title_embed.xml +7 -0
- data/tests/illformed/sanitize/feed_title_frame.xml +7 -0
- data/tests/illformed/sanitize/feed_title_iframe.xml +7 -0
- data/tests/illformed/sanitize/feed_title_link.xml +7 -0
- data/tests/illformed/sanitize/feed_title_meta.xml +7 -0
- data/tests/illformed/sanitize/feed_title_object.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onabort.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onblur.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onchange.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onclick.xml +7 -0
- data/tests/illformed/sanitize/feed_title_ondblclick.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onerror.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onfocus.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onkeydown.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onkeypress.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onkeyup.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onload.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onmousedown.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onmouseout.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onmouseover.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onmouseup.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onreset.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onresize.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onsubmit.xml +7 -0
- data/tests/illformed/sanitize/feed_title_onunload.xml +7 -0
- data/tests/illformed/sanitize/feed_title_script.xml +7 -0
- data/tests/illformed/sanitize/feed_title_script_cdata.xml +7 -0
- data/tests/illformed/sanitize/feed_title_script_inline.xml +7 -0
- data/tests/illformed/sanitize/feed_title_style.xml +7 -0
- data/tests/illformed/sanitize/item_body_applet.xml +11 -0
- data/tests/illformed/sanitize/item_body_blink.xml +11 -0
- data/tests/illformed/sanitize/item_body_embed.xml +11 -0
- data/tests/illformed/sanitize/item_body_frame.xml +11 -0
- data/tests/illformed/sanitize/item_body_iframe.xml +11 -0
- data/tests/illformed/sanitize/item_body_link.xml +11 -0
- data/tests/illformed/sanitize/item_body_meta.xml +11 -0
- data/tests/illformed/sanitize/item_body_object.xml +11 -0
- data/tests/illformed/sanitize/item_body_onabort.xml +11 -0
- data/tests/illformed/sanitize/item_body_onblur.xml +11 -0
- data/tests/illformed/sanitize/item_body_onchange.xml +11 -0
- data/tests/illformed/sanitize/item_body_onclick.xml +11 -0
- data/tests/illformed/sanitize/item_body_ondblclick.xml +11 -0
- data/tests/illformed/sanitize/item_body_onerror.xml +11 -0
- data/tests/illformed/sanitize/item_body_onfocus.xml +11 -0
- data/tests/illformed/sanitize/item_body_onkeydown.xml +11 -0
- data/tests/illformed/sanitize/item_body_onkeypress.xml +11 -0
- data/tests/illformed/sanitize/item_body_onkeyup.xml +11 -0
- data/tests/illformed/sanitize/item_body_onload.xml +11 -0
- data/tests/illformed/sanitize/item_body_onmousedown.xml +11 -0
- data/tests/illformed/sanitize/item_body_onmouseout.xml +11 -0
- data/tests/illformed/sanitize/item_body_onmouseover.xml +11 -0
- data/tests/illformed/sanitize/item_body_onmouseup.xml +11 -0
- data/tests/illformed/sanitize/item_body_onreset.xml +11 -0
- data/tests/illformed/sanitize/item_body_onresize.xml +11 -0
- data/tests/illformed/sanitize/item_body_onsubmit.xml +11 -0
- data/tests/illformed/sanitize/item_body_onunload.xml +11 -0
- data/tests/illformed/sanitize/item_body_script.xml +11 -0
- data/tests/illformed/sanitize/item_body_script_map_content.xml +11 -0
- data/tests/illformed/sanitize/item_body_style.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_applet.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_blink.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_crazy.xml +77 -0
- data/tests/illformed/sanitize/item_content_encoded_embed.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_frame.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_iframe.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_link.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_map_content.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_meta.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_object.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onabort.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onblur.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onchange.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onclick.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_ondblclick.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onerror.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onfocus.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onkeydown.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onkeypress.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onkeyup.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onload.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onmousedown.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onmouseout.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onmouseover.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onmouseup.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onreset.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onresize.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onsubmit.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_onunload.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_script.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_script_cdata.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_script_map_content.xml +11 -0
- data/tests/illformed/sanitize/item_content_encoded_style.xml +11 -0
- data/tests/illformed/sanitize/item_description_applet.xml +11 -0
- data/tests/illformed/sanitize/item_description_blink.xml +11 -0
- data/tests/illformed/sanitize/item_description_crazy.xml +81 -0
- data/tests/illformed/sanitize/item_description_embed.xml +11 -0
- data/tests/illformed/sanitize/item_description_frame.xml +11 -0
- data/tests/illformed/sanitize/item_description_iframe.xml +11 -0
- data/tests/illformed/sanitize/item_description_link.xml +11 -0
- data/tests/illformed/sanitize/item_description_meta.xml +11 -0
- data/tests/illformed/sanitize/item_description_object.xml +11 -0
- data/tests/illformed/sanitize/item_description_onabort.xml +11 -0
- data/tests/illformed/sanitize/item_description_onblur.xml +11 -0
- data/tests/illformed/sanitize/item_description_onchange.xml +11 -0
- data/tests/illformed/sanitize/item_description_onclick.xml +11 -0
- data/tests/illformed/sanitize/item_description_ondblclick.xml +11 -0
- data/tests/illformed/sanitize/item_description_onerror.xml +11 -0
- data/tests/illformed/sanitize/item_description_onfocus.xml +11 -0
- data/tests/illformed/sanitize/item_description_onkeydown.xml +11 -0
- data/tests/illformed/sanitize/item_description_onkeypress.xml +11 -0
- data/tests/illformed/sanitize/item_description_onkeyup.xml +11 -0
- data/tests/illformed/sanitize/item_description_onload.xml +11 -0
- data/tests/illformed/sanitize/item_description_onmousedown.xml +11 -0
- data/tests/illformed/sanitize/item_description_onmouseout.xml +11 -0
- data/tests/illformed/sanitize/item_description_onmouseover.xml +11 -0
- data/tests/illformed/sanitize/item_description_onmouseup.xml +11 -0
- data/tests/illformed/sanitize/item_description_onreset.xml +11 -0
- data/tests/illformed/sanitize/item_description_onresize.xml +11 -0
- data/tests/illformed/sanitize/item_description_onsubmit.xml +11 -0
- data/tests/illformed/sanitize/item_description_onunload.xml +11 -0
- data/tests/illformed/sanitize/item_description_script.xml +11 -0
- data/tests/illformed/sanitize/item_description_script_cdata.xml +11 -0
- data/tests/illformed/sanitize/item_description_script_map_summary.xml +11 -0
- data/tests/illformed/sanitize/item_description_style.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_applet.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_blink.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_crazy.xml +77 -0
- data/tests/illformed/sanitize/item_fullitem_embed.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_frame.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_iframe.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_link.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_meta.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_object.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onabort.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onblur.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onchange.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onclick.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_ondblclick.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onerror.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onfocus.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onkeydown.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onkeypress.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onkeyup.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onload.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onmousedown.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onmouseout.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onmouseover.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onmouseup.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onreset.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onresize.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onsubmit.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_onunload.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_script.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_script_cdata.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_script_map_summary.xml +11 -0
- data/tests/illformed/sanitize/item_fullitem_style.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_applet.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_blink.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_embed.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_frame.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_iframe.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_link.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_meta.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_object.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onabort.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onblur.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onchange.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onclick.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_ondblclick.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onerror.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onfocus.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onkeydown.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onkeypress.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onkeyup.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onload.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onmousedown.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onmouseout.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onmouseover.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onmouseup.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onreset.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onresize.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onsubmit.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_onunload.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_script.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_script_map_content.xml +11 -0
- data/tests/illformed/sanitize/item_xhtml_body_style.xml +11 -0
- data/tests/wellformed/amp/amp01.xml +9 -0
- data/tests/wellformed/amp/amp02.xml +9 -0
- data/tests/wellformed/amp/amp03.xml +9 -0
- data/tests/wellformed/amp/amp04.xml +9 -0
- data/tests/wellformed/amp/amp05.xml +9 -0
- data/tests/wellformed/amp/amp06.xml +9 -0
- data/tests/wellformed/amp/amp07.xml +9 -0
- data/tests/wellformed/amp/amp08.xml +9 -0
- data/tests/wellformed/amp/amp09.xml +9 -0
- data/tests/wellformed/amp/amp10.xml +9 -0
- data/tests/wellformed/amp/amp11.xml +9 -0
- data/tests/wellformed/amp/amp12.xml +9 -0
- data/tests/wellformed/amp/amp13.xml +9 -0
- data/tests/wellformed/amp/amp14.xml +9 -0
- data/tests/wellformed/amp/amp15.xml +9 -0
- data/tests/wellformed/amp/amp16.xml +9 -0
- data/tests/wellformed/amp/amp17.xml +9 -0
- data/tests/wellformed/amp/amp18.xml +9 -0
- data/tests/wellformed/amp/amp19.xml +9 -0
- data/tests/wellformed/amp/amp20.xml +9 -0
- data/tests/wellformed/amp/amp21.xml +9 -0
- data/tests/wellformed/amp/amp22.xml +9 -0
- data/tests/wellformed/amp/amp23.xml +9 -0
- data/tests/wellformed/amp/amp24.xml +9 -0
- data/tests/wellformed/amp/amp25.xml +9 -0
- data/tests/wellformed/amp/amp26.xml +9 -0
- data/tests/wellformed/amp/amp27.xml +9 -0
- data/tests/wellformed/amp/amp28.xml +9 -0
- data/tests/wellformed/amp/amp29.xml +9 -0
- data/tests/wellformed/amp/amp30.xml +9 -0
- data/tests/wellformed/amp/amp31.xml +9 -0
- data/tests/wellformed/amp/amp32.xml +9 -0
- data/tests/wellformed/amp/amp33.xml +9 -0
- data/tests/wellformed/amp/amp34.xml +9 -0
- data/tests/wellformed/amp/amp35.xml +9 -0
- data/tests/wellformed/amp/amp36.xml +9 -0
- data/tests/wellformed/amp/amp37.xml +9 -0
- data/tests/wellformed/amp/amp38.xml +9 -0
- data/tests/wellformed/amp/amp39.xml +9 -0
- data/tests/wellformed/amp/amp40.xml +9 -0
- data/tests/wellformed/amp/amp41.xml +9 -0
- data/tests/wellformed/amp/amp42.xml +9 -0
- data/tests/wellformed/amp/amp43.xml +9 -0
- data/tests/wellformed/amp/amp44.xml +9 -0
- data/tests/wellformed/amp/amp45.xml +9 -0
- data/tests/wellformed/amp/amp46.xml +9 -0
- data/tests/wellformed/amp/amp47.xml +9 -0
- data/tests/wellformed/amp/amp48.xml +9 -0
- data/tests/wellformed/amp/amp49.xml +9 -0
- data/tests/wellformed/amp/amp50.xml +9 -0
- data/tests/wellformed/amp/amp51.xml +9 -0
- data/tests/wellformed/amp/amp52.xml +9 -0
- data/tests/wellformed/amp/amp53.xml +9 -0
- data/tests/wellformed/amp/amp54.xml +9 -0
- data/tests/wellformed/amp/amp55.xml +9 -0
- data/tests/wellformed/amp/amp56.xml +9 -0
- data/tests/wellformed/amp/amp57.xml +9 -0
- data/tests/wellformed/amp/amp58.xml +9 -0
- data/tests/wellformed/amp/amp59.xml +9 -0
- data/tests/wellformed/amp/amp60.xml +9 -0
- data/tests/wellformed/amp/amp61.xml +9 -0
- data/tests/wellformed/amp/amp62.xml +9 -0
- data/tests/wellformed/amp/amp63.xml +9 -0
- data/tests/wellformed/amp/amp64.xml +9 -0
- data/tests/wellformed/atom/atom_namespace_1.xml +7 -0
- data/tests/wellformed/atom/atom_namespace_2.xml +7 -0
- data/tests/wellformed/atom/atom_namespace_3.xml +7 -0
- data/tests/wellformed/atom/atom_namespace_4.xml +7 -0
- data/tests/wellformed/atom/atom_namespace_5.xml +7 -0
- data/tests/wellformed/atom/entry_author_email.xml +13 -0
- data/tests/wellformed/atom/entry_author_homepage.xml +13 -0
- data/tests/wellformed/atom/entry_author_map_author.xml +13 -0
- data/tests/wellformed/atom/entry_author_map_author_2.xml +12 -0
- data/tests/wellformed/atom/entry_author_name.xml +13 -0
- data/tests/wellformed/atom/entry_author_uri.xml +13 -0
- data/tests/wellformed/atom/entry_author_url.xml +13 -0
- data/tests/wellformed/atom/entry_content_mode_base64.xml +11 -0
- data/tests/wellformed/atom/entry_content_mode_escaped.xml +9 -0
- data/tests/wellformed/atom/entry_content_type.xml +9 -0
- data/tests/wellformed/atom/entry_content_type_text_plain.xml +9 -0
- data/tests/wellformed/atom/entry_content_value.xml +9 -0
- data/tests/wellformed/atom/entry_contributor_email.xml +13 -0
- data/tests/wellformed/atom/entry_contributor_homepage.xml +13 -0
- data/tests/wellformed/atom/entry_contributor_multiple.xml +18 -0
- data/tests/wellformed/atom/entry_contributor_name.xml +13 -0
- data/tests/wellformed/atom/entry_contributor_uri.xml +13 -0
- data/tests/wellformed/atom/entry_contributor_url.xml +13 -0
- data/tests/wellformed/atom/entry_id.xml +9 -0
- data/tests/wellformed/atom/entry_id_map_guid.xml +9 -0
- data/tests/wellformed/atom/entry_link_alternate_map_link.xml +9 -0
- data/tests/wellformed/atom/entry_link_alternate_map_link_2.xml +9 -0
- data/tests/wellformed/atom/entry_link_href.xml +9 -0
- data/tests/wellformed/atom/entry_link_multiple.xml +10 -0
- data/tests/wellformed/atom/entry_link_rel.xml +9 -0
- data/tests/wellformed/atom/entry_link_title.xml +9 -0
- data/tests/wellformed/atom/entry_link_type.xml +9 -0
- data/tests/wellformed/atom/entry_summary.xml +9 -0
- data/tests/wellformed/atom/entry_summary_base64.xml +11 -0
- data/tests/wellformed/atom/entry_summary_base64_2.xml +11 -0
- data/tests/wellformed/atom/entry_summary_content_mode_base64.xml +11 -0
- data/tests/wellformed/atom/entry_summary_content_mode_escaped.xml +9 -0
- data/tests/wellformed/atom/entry_summary_content_type.xml +9 -0
- data/tests/wellformed/atom/entry_summary_content_type_text_plain.xml +9 -0
- data/tests/wellformed/atom/entry_summary_content_value.xml +9 -0
- data/tests/wellformed/atom/entry_summary_escaped_markup.xml +9 -0
- data/tests/wellformed/atom/entry_summary_inline_markup.xml +9 -0
- data/tests/wellformed/atom/entry_summary_inline_markup_2.xml +9 -0
- data/tests/wellformed/atom/entry_summary_naked_markup.xml +9 -0
- data/tests/wellformed/atom/entry_summary_text_plain.xml +9 -0
- data/tests/wellformed/atom/entry_title.xml +9 -0
- data/tests/wellformed/atom/entry_title_base64.xml +11 -0
- data/tests/wellformed/atom/entry_title_base64_2.xml +11 -0
- data/tests/wellformed/atom/entry_title_content_mode_base64.xml +11 -0
- data/tests/wellformed/atom/entry_title_content_mode_escaped.xml +9 -0
- data/tests/wellformed/atom/entry_title_content_type.xml +9 -0
- data/tests/wellformed/atom/entry_title_content_type_text_plain.xml +9 -0
- data/tests/wellformed/atom/entry_title_content_value.xml +9 -0
- data/tests/wellformed/atom/entry_title_escaped_markup.xml +9 -0
- data/tests/wellformed/atom/entry_title_inline_markup.xml +9 -0
- data/tests/wellformed/atom/entry_title_inline_markup_2.xml +9 -0
- data/tests/wellformed/atom/entry_title_naked_markup.xml +9 -0
- data/tests/wellformed/atom/entry_title_text_plain.xml +9 -0
- data/tests/wellformed/atom/entry_title_text_plain_brackets.xml +9 -0
- data/tests/wellformed/atom/feed_author_email.xml +11 -0
- data/tests/wellformed/atom/feed_author_homepage.xml +11 -0
- data/tests/wellformed/atom/feed_author_map_author.xml +11 -0
- data/tests/wellformed/atom/feed_author_map_author_2.xml +10 -0
- data/tests/wellformed/atom/feed_author_name.xml +11 -0
- data/tests/wellformed/atom/feed_author_uri.xml +11 -0
- data/tests/wellformed/atom/feed_author_url.xml +11 -0
- data/tests/wellformed/atom/feed_contributor_email.xml +11 -0
- data/tests/wellformed/atom/feed_contributor_homepage.xml +11 -0
- data/tests/wellformed/atom/feed_contributor_multiple.xml +16 -0
- data/tests/wellformed/atom/feed_contributor_name.xml +11 -0
- data/tests/wellformed/atom/feed_contributor_uri.xml +11 -0
- data/tests/wellformed/atom/feed_contributor_url.xml +11 -0
- data/tests/wellformed/atom/feed_copyright.xml +7 -0
- data/tests/wellformed/atom/feed_copyright_base64.xml +9 -0
- data/tests/wellformed/atom/feed_copyright_base64_2.xml +9 -0
- data/tests/wellformed/atom/feed_copyright_content_mode_base64.xml +9 -0
- data/tests/wellformed/atom/feed_copyright_content_mode_escaped.xml +7 -0
- data/tests/wellformed/atom/feed_copyright_content_type.xml +7 -0
- data/tests/wellformed/atom/feed_copyright_content_type_text_plain.xml +7 -0
- data/tests/wellformed/atom/feed_copyright_content_value.xml +7 -0
- data/tests/wellformed/atom/feed_copyright_escaped_markup.xml +7 -0
- data/tests/wellformed/atom/feed_copyright_inline_markup.xml +7 -0
- data/tests/wellformed/atom/feed_copyright_inline_markup_2.xml +7 -0
- data/tests/wellformed/atom/feed_copyright_naked_markup.xml +7 -0
- data/tests/wellformed/atom/feed_copyright_text_plain.xml +7 -0
- data/tests/wellformed/atom/feed_generator.xml +7 -0
- data/tests/wellformed/atom/feed_generator_name.xml +7 -0
- data/tests/wellformed/atom/feed_generator_url.xml +7 -0
- data/tests/wellformed/atom/feed_generator_version.xml +7 -0
- data/tests/wellformed/atom/feed_id.xml +7 -0
- data/tests/wellformed/atom/feed_id_map_guid.xml +7 -0
- data/tests/wellformed/atom/feed_info.xml +7 -0
- data/tests/wellformed/atom/feed_info_base64.xml +9 -0
- data/tests/wellformed/atom/feed_info_base64_2.xml +9 -0
- data/tests/wellformed/atom/feed_info_content_mode_base64.xml +9 -0
- data/tests/wellformed/atom/feed_info_content_mode_escaped.xml +7 -0
- data/tests/wellformed/atom/feed_info_content_type.xml +7 -0
- data/tests/wellformed/atom/feed_info_content_type_text_plain.xml +7 -0
- data/tests/wellformed/atom/feed_info_content_value.xml +7 -0
- data/tests/wellformed/atom/feed_info_escaped_markup.xml +7 -0
- data/tests/wellformed/atom/feed_info_inline_markup.xml +7 -0
- data/tests/wellformed/atom/feed_info_inline_markup_2.xml +7 -0
- data/tests/wellformed/atom/feed_info_naked_markup.xml +7 -0
- data/tests/wellformed/atom/feed_info_text_plain.xml +7 -0
- data/tests/wellformed/atom/feed_link_alternate_map_link.xml +7 -0
- data/tests/wellformed/atom/feed_link_alternate_map_link_2.xml +7 -0
- data/tests/wellformed/atom/feed_link_href.xml +7 -0
- data/tests/wellformed/atom/feed_link_multiple.xml +8 -0
- data/tests/wellformed/atom/feed_link_rel.xml +7 -0
- data/tests/wellformed/atom/feed_link_title.xml +7 -0
- data/tests/wellformed/atom/feed_link_type.xml +7 -0
- data/tests/wellformed/atom/feed_tagline.xml +7 -0
- data/tests/wellformed/atom/feed_tagline_base64.xml +9 -0
- data/tests/wellformed/atom/feed_tagline_base64_2.xml +9 -0
- data/tests/wellformed/atom/feed_tagline_content_mode_base64.xml +9 -0
- data/tests/wellformed/atom/feed_tagline_content_mode_escaped.xml +7 -0
- data/tests/wellformed/atom/feed_tagline_content_type.xml +7 -0
- data/tests/wellformed/atom/feed_tagline_content_type_text_plain.xml +7 -0
- data/tests/wellformed/atom/feed_tagline_content_value.xml +7 -0
- data/tests/wellformed/atom/feed_tagline_escaped_markup.xml +7 -0
- data/tests/wellformed/atom/feed_tagline_inline_markup.xml +7 -0
- data/tests/wellformed/atom/feed_tagline_inline_markup_2.xml +7 -0
- data/tests/wellformed/atom/feed_tagline_naked_markup.xml +7 -0
- data/tests/wellformed/atom/feed_tagline_text_plain.xml +7 -0
- data/tests/wellformed/atom/feed_title.xml +7 -0
- data/tests/wellformed/atom/feed_title_base64.xml +9 -0
- data/tests/wellformed/atom/feed_title_base64_2.xml +9 -0
- data/tests/wellformed/atom/feed_title_content_mode_base64.xml +9 -0
- data/tests/wellformed/atom/feed_title_content_mode_escaped.xml +7 -0
- data/tests/wellformed/atom/feed_title_content_type.xml +7 -0
- data/tests/wellformed/atom/feed_title_content_type_text_plain.xml +7 -0
- data/tests/wellformed/atom/feed_title_content_value.xml +7 -0
- data/tests/wellformed/atom/feed_title_escaped_markup.xml +7 -0
- data/tests/wellformed/atom/feed_title_inline_markup.xml +7 -0
- data/tests/wellformed/atom/feed_title_inline_markup_2.xml +7 -0
- data/tests/wellformed/atom/feed_title_naked_markup.xml +7 -0
- data/tests/wellformed/atom/feed_title_text_plain.xml +7 -0
- data/tests/wellformed/atom/relative_uri.xml +7 -0
- data/tests/wellformed/atom/relative_uri_inherit.xml +7 -0
- data/tests/wellformed/atom/relative_uri_inherit_2.xml +7 -0
- data/tests/wellformed/atom10/atom10_namespace.xml +7 -0
- data/tests/wellformed/atom10/atom10_version.xml +6 -0
- data/tests/wellformed/atom10/entry_author_email.xml +13 -0
- data/tests/wellformed/atom10/entry_author_map_author.xml +13 -0
- data/tests/wellformed/atom10/entry_author_map_author_2.xml +12 -0
- data/tests/wellformed/atom10/entry_author_name.xml +13 -0
- data/tests/wellformed/atom10/entry_author_uri.xml +13 -0
- data/tests/wellformed/atom10/entry_author_url.xml +13 -0
- data/tests/wellformed/atom10/entry_category_label.xml +9 -0
- data/tests/wellformed/atom10/entry_category_scheme.xml +9 -0
- data/tests/wellformed/atom10/entry_category_term.xml +9 -0
- data/tests/wellformed/atom10/entry_content_application_xml.xml +9 -0
- data/tests/wellformed/atom10/entry_content_base64.xml +11 -0
- data/tests/wellformed/atom10/entry_content_base64_2.xml +11 -0
- data/tests/wellformed/atom10/entry_content_escaped_markup.xml +9 -0
- data/tests/wellformed/atom10/entry_content_inline_markup.xml +9 -0
- data/tests/wellformed/atom10/entry_content_inline_markup_2.xml +9 -0
- data/tests/wellformed/atom10/entry_content_src.xml +9 -0
- data/tests/wellformed/atom10/entry_content_text_plain.xml +9 -0
- data/tests/wellformed/atom10/entry_content_text_plain_brackets.xml +9 -0
- data/tests/wellformed/atom10/entry_content_type.xml +9 -0
- data/tests/wellformed/atom10/entry_content_type_text.xml +9 -0
- data/tests/wellformed/atom10/entry_content_value.xml +9 -0
- data/tests/wellformed/atom10/entry_contributor_email.xml +13 -0
- data/tests/wellformed/atom10/entry_contributor_multiple.xml +18 -0
- data/tests/wellformed/atom10/entry_contributor_name.xml +13 -0
- data/tests/wellformed/atom10/entry_contributor_uri.xml +13 -0
- data/tests/wellformed/atom10/entry_contributor_url.xml +13 -0
- data/tests/wellformed/atom10/entry_id.xml +9 -0
- data/tests/wellformed/atom10/entry_id_map_guid.xml +9 -0
- data/tests/wellformed/atom10/entry_id_no_normalization_1.xml +9 -0
- data/tests/wellformed/atom10/entry_id_no_normalization_2.xml +9 -0
- data/tests/wellformed/atom10/entry_id_no_normalization_3.xml +9 -0
- data/tests/wellformed/atom10/entry_id_no_normalization_4.xml +9 -0
- data/tests/wellformed/atom10/entry_id_no_normalization_5.xml +9 -0
- data/tests/wellformed/atom10/entry_id_no_normalization_6.xml +9 -0
- data/tests/wellformed/atom10/entry_id_no_normalization_7.xml +9 -0
- data/tests/wellformed/atom10/entry_link_alternate_map_link.xml +9 -0
- data/tests/wellformed/atom10/entry_link_alternate_map_link_2.xml +9 -0
- data/tests/wellformed/atom10/entry_link_alternate_map_link_3.xml +11 -0
- data/tests/wellformed/atom10/entry_link_href.xml +9 -0
- data/tests/wellformed/atom10/entry_link_hreflang.xml +9 -0
- data/tests/wellformed/atom10/entry_link_length.xml +9 -0
- data/tests/wellformed/atom10/entry_link_multiple.xml +10 -0
- data/tests/wellformed/atom10/entry_link_no_rel.xml +9 -0
- data/tests/wellformed/atom10/entry_link_rel.xml +9 -0
- data/tests/wellformed/atom10/entry_link_rel_enclosure.xml +9 -0
- data/tests/wellformed/atom10/entry_link_rel_enclosure_map_enclosure_length.xml +9 -0
- data/tests/wellformed/atom10/entry_link_rel_enclosure_map_enclosure_type.xml +9 -0
- data/tests/wellformed/atom10/entry_link_rel_enclosure_map_enclosure_url.xml +9 -0
- data/tests/wellformed/atom10/entry_link_rel_other.xml +9 -0
- data/tests/wellformed/atom10/entry_link_rel_related.xml +9 -0
- data/tests/wellformed/atom10/entry_link_rel_self.xml +9 -0
- data/tests/wellformed/atom10/entry_link_rel_via.xml +9 -0
- data/tests/wellformed/atom10/entry_link_title.xml +9 -0
- data/tests/wellformed/atom10/entry_link_type.xml +9 -0
- data/tests/wellformed/atom10/entry_rights.xml +9 -0
- data/tests/wellformed/atom10/entry_rights_content_value.xml +9 -0
- data/tests/wellformed/atom10/entry_rights_escaped_markup.xml +9 -0
- data/tests/wellformed/atom10/entry_rights_inline_markup.xml +9 -0
- data/tests/wellformed/atom10/entry_rights_inline_markup_2.xml +9 -0
- data/tests/wellformed/atom10/entry_rights_text_plain.xml +9 -0
- data/tests/wellformed/atom10/entry_rights_text_plain_brackets.xml +9 -0
- data/tests/wellformed/atom10/entry_rights_type_default.xml +9 -0
- data/tests/wellformed/atom10/entry_rights_type_text.xml +9 -0
- data/tests/wellformed/atom10/entry_source_author_email.xml +15 -0
- data/tests/wellformed/atom10/entry_source_author_map_author.xml +15 -0
- data/tests/wellformed/atom10/entry_source_author_map_author_2.xml +14 -0
- data/tests/wellformed/atom10/entry_source_author_name.xml +15 -0
- data/tests/wellformed/atom10/entry_source_author_uri.xml +15 -0
- data/tests/wellformed/atom10/entry_source_category_label.xml +11 -0
- data/tests/wellformed/atom10/entry_source_category_scheme.xml +11 -0
- data/tests/wellformed/atom10/entry_source_category_term.xml +11 -0
- data/tests/wellformed/atom10/entry_source_contributor_email.xml +15 -0
- data/tests/wellformed/atom10/entry_source_contributor_multiple.xml +20 -0
- data/tests/wellformed/atom10/entry_source_contributor_name.xml +15 -0
- data/tests/wellformed/atom10/entry_source_contributor_uri.xml +15 -0
- data/tests/wellformed/atom10/entry_source_generator.xml +11 -0
- data/tests/wellformed/atom10/entry_source_generator_name.xml +11 -0
- data/tests/wellformed/atom10/entry_source_generator_uri.xml +11 -0
- data/tests/wellformed/atom10/entry_source_generator_version.xml +11 -0
- data/tests/wellformed/atom10/entry_source_icon.xml +11 -0
- data/tests/wellformed/atom10/entry_source_id.xml +11 -0
- data/tests/wellformed/atom10/entry_source_link_alternate_map_link.xml +11 -0
- data/tests/wellformed/atom10/entry_source_link_alternate_map_link_2.xml +11 -0
- data/tests/wellformed/atom10/entry_source_link_href.xml +11 -0
- data/tests/wellformed/atom10/entry_source_link_hreflang.xml +11 -0
- data/tests/wellformed/atom10/entry_source_link_length.xml +11 -0
- data/tests/wellformed/atom10/entry_source_link_multiple.xml +12 -0
- data/tests/wellformed/atom10/entry_source_link_no_rel.xml +11 -0
- data/tests/wellformed/atom10/entry_source_link_rel.xml +11 -0
- data/tests/wellformed/atom10/entry_source_link_rel_other.xml +11 -0
- data/tests/wellformed/atom10/entry_source_link_rel_related.xml +11 -0
- data/tests/wellformed/atom10/entry_source_link_rel_self.xml +11 -0
- data/tests/wellformed/atom10/entry_source_link_rel_via.xml +11 -0
- data/tests/wellformed/atom10/entry_source_link_title.xml +11 -0
- data/tests/wellformed/atom10/entry_source_link_type.xml +11 -0
- data/tests/wellformed/atom10/entry_source_logo.xml +11 -0
- data/tests/wellformed/atom10/entry_source_rights.xml +11 -0
- data/tests/wellformed/atom10/entry_source_rights_base64.xml +13 -0
- data/tests/wellformed/atom10/entry_source_rights_base64_2.xml +13 -0
- data/tests/wellformed/atom10/entry_source_rights_content_type.xml +11 -0
- data/tests/wellformed/atom10/entry_source_rights_content_type_text.xml +11 -0
- data/tests/wellformed/atom10/entry_source_rights_content_value.xml +11 -0
- data/tests/wellformed/atom10/entry_source_rights_escaped_markup.xml +11 -0
- data/tests/wellformed/atom10/entry_source_rights_inline_markup.xml +11 -0
- data/tests/wellformed/atom10/entry_source_rights_inline_markup_2.xml +11 -0
- data/tests/wellformed/atom10/entry_source_rights_text_plain.xml +11 -0
- data/tests/wellformed/atom10/entry_source_subittle_content_type_text.xml +11 -0
- data/tests/wellformed/atom10/entry_source_subtitle.xml +11 -0
- data/tests/wellformed/atom10/entry_source_subtitle_base64.xml +13 -0
- data/tests/wellformed/atom10/entry_source_subtitle_base64_2.xml +13 -0
- data/tests/wellformed/atom10/entry_source_subtitle_content_type.xml +11 -0
- data/tests/wellformed/atom10/entry_source_subtitle_content_value.xml +11 -0
- data/tests/wellformed/atom10/entry_source_subtitle_escaped_markup.xml +11 -0
- data/tests/wellformed/atom10/entry_source_subtitle_inline_markup.xml +11 -0
- data/tests/wellformed/atom10/entry_source_subtitle_inline_markup_2.xml +11 -0
- data/tests/wellformed/atom10/entry_source_subtitle_text_plain.xml +11 -0
- data/tests/wellformed/atom10/entry_source_title.xml +11 -0
- data/tests/wellformed/atom10/entry_source_title_base64.xml +13 -0
- data/tests/wellformed/atom10/entry_source_title_base64_2.xml +13 -0
- data/tests/wellformed/atom10/entry_source_title_content_type.xml +11 -0
- data/tests/wellformed/atom10/entry_source_title_content_type_text.xml +11 -0
- data/tests/wellformed/atom10/entry_source_title_content_value.xml +11 -0
- data/tests/wellformed/atom10/entry_source_title_escaped_markup.xml +11 -0
- data/tests/wellformed/atom10/entry_source_title_inline_markup.xml +11 -0
- data/tests/wellformed/atom10/entry_source_title_inline_markup_2.xml +11 -0
- data/tests/wellformed/atom10/entry_source_title_text_plain.xml +11 -0
- data/tests/wellformed/atom10/entry_summary.xml +9 -0
- data/tests/wellformed/atom10/entry_summary_base64.xml +11 -0
- data/tests/wellformed/atom10/entry_summary_base64_2.xml +11 -0
- data/tests/wellformed/atom10/entry_summary_content_value.xml +9 -0
- data/tests/wellformed/atom10/entry_summary_escaped_markup.xml +9 -0
- data/tests/wellformed/atom10/entry_summary_inline_markup.xml +9 -0
- data/tests/wellformed/atom10/entry_summary_inline_markup_2.xml +9 -0
- data/tests/wellformed/atom10/entry_summary_text_plain.xml +9 -0
- data/tests/wellformed/atom10/entry_summary_type_default.xml +9 -0
- data/tests/wellformed/atom10/entry_summary_type_text.xml +9 -0
- data/tests/wellformed/atom10/entry_title.xml +9 -0
- data/tests/wellformed/atom10/entry_title_base64.xml +11 -0
- data/tests/wellformed/atom10/entry_title_base64_2.xml +11 -0
- data/tests/wellformed/atom10/entry_title_content_value.xml +9 -0
- data/tests/wellformed/atom10/entry_title_escaped_markup.xml +9 -0
- data/tests/wellformed/atom10/entry_title_inline_markup.xml +9 -0
- data/tests/wellformed/atom10/entry_title_inline_markup_2.xml +9 -0
- data/tests/wellformed/atom10/entry_title_text_plain.xml +9 -0
- data/tests/wellformed/atom10/entry_title_text_plain_brackets.xml +9 -0
- data/tests/wellformed/atom10/entry_title_type_default.xml +9 -0
- data/tests/wellformed/atom10/entry_title_type_text.xml +9 -0
- data/tests/wellformed/atom10/feed_author_email.xml +11 -0
- data/tests/wellformed/atom10/feed_author_map_author.xml +11 -0
- data/tests/wellformed/atom10/feed_author_map_author_2.xml +10 -0
- data/tests/wellformed/atom10/feed_author_name.xml +11 -0
- data/tests/wellformed/atom10/feed_author_uri.xml +11 -0
- data/tests/wellformed/atom10/feed_author_url.xml +11 -0
- data/tests/wellformed/atom10/feed_contributor_email.xml +11 -0
- data/tests/wellformed/atom10/feed_contributor_multiple.xml +16 -0
- data/tests/wellformed/atom10/feed_contributor_name.xml +11 -0
- data/tests/wellformed/atom10/feed_contributor_uri.xml +11 -0
- data/tests/wellformed/atom10/feed_contributor_url.xml +11 -0
- data/tests/wellformed/atom10/feed_generator.xml +7 -0
- data/tests/wellformed/atom10/feed_generator_name.xml +7 -0
- data/tests/wellformed/atom10/feed_generator_url.xml +7 -0
- data/tests/wellformed/atom10/feed_generator_version.xml +7 -0
- data/tests/wellformed/atom10/feed_icon.xml +7 -0
- data/tests/wellformed/atom10/feed_id.xml +7 -0
- data/tests/wellformed/atom10/feed_id_map_guid.xml +7 -0
- data/tests/wellformed/atom10/feed_link_alternate_map_link.xml +7 -0
- data/tests/wellformed/atom10/feed_link_alternate_map_link_2.xml +7 -0
- data/tests/wellformed/atom10/feed_link_href.xml +7 -0
- data/tests/wellformed/atom10/feed_link_hreflang.xml +7 -0
- data/tests/wellformed/atom10/feed_link_length.xml +7 -0
- data/tests/wellformed/atom10/feed_link_multiple.xml +8 -0
- data/tests/wellformed/atom10/feed_link_no_rel.xml +7 -0
- data/tests/wellformed/atom10/feed_link_rel.xml +7 -0
- data/tests/wellformed/atom10/feed_link_rel_other.xml +7 -0
- data/tests/wellformed/atom10/feed_link_rel_related.xml +7 -0
- data/tests/wellformed/atom10/feed_link_rel_self.xml +7 -0
- data/tests/wellformed/atom10/feed_link_rel_via.xml +7 -0
- data/tests/wellformed/atom10/feed_link_title.xml +7 -0
- data/tests/wellformed/atom10/feed_link_type.xml +7 -0
- data/tests/wellformed/atom10/feed_logo.xml +7 -0
- data/tests/wellformed/atom10/feed_rights.xml +7 -0
- data/tests/wellformed/atom10/feed_rights_base64.xml +9 -0
- data/tests/wellformed/atom10/feed_rights_base64_2.xml +9 -0
- data/tests/wellformed/atom10/feed_rights_content_type.xml +7 -0
- data/tests/wellformed/atom10/feed_rights_content_type_text.xml +7 -0
- data/tests/wellformed/atom10/feed_rights_content_value.xml +7 -0
- data/tests/wellformed/atom10/feed_rights_escaped_markup.xml +7 -0
- data/tests/wellformed/atom10/feed_rights_inline_markup.xml +7 -0
- data/tests/wellformed/atom10/feed_rights_inline_markup_2.xml +7 -0
- data/tests/wellformed/atom10/feed_rights_text_plain.xml +7 -0
- data/tests/wellformed/atom10/feed_subtitle.xml +7 -0
- data/tests/wellformed/atom10/feed_subtitle_base64.xml +9 -0
- data/tests/wellformed/atom10/feed_subtitle_base64_2.xml +9 -0
- data/tests/wellformed/atom10/feed_subtitle_content_type.xml +7 -0
- data/tests/wellformed/atom10/feed_subtitle_content_type_text.xml +7 -0
- data/tests/wellformed/atom10/feed_subtitle_content_value.xml +7 -0
- data/tests/wellformed/atom10/feed_subtitle_escaped_markup.xml +7 -0
- data/tests/wellformed/atom10/feed_subtitle_inline_markup.xml +7 -0
- data/tests/wellformed/atom10/feed_subtitle_inline_markup_2.xml +7 -0
- data/tests/wellformed/atom10/feed_subtitle_text_plain.xml +7 -0
- data/tests/wellformed/atom10/feed_title.xml +7 -0
- data/tests/wellformed/atom10/feed_title_base64.xml +9 -0
- data/tests/wellformed/atom10/feed_title_base64_2.xml +9 -0
- data/tests/wellformed/atom10/feed_title_content_type.xml +7 -0
- data/tests/wellformed/atom10/feed_title_content_type_text.xml +7 -0
- data/tests/wellformed/atom10/feed_title_content_value.xml +7 -0
- data/tests/wellformed/atom10/feed_title_escaped_markup.xml +7 -0
- data/tests/wellformed/atom10/feed_title_inline_markup.xml +7 -0
- data/tests/wellformed/atom10/feed_title_inline_markup_2.xml +7 -0
- data/tests/wellformed/atom10/feed_title_text_plain.xml +7 -0
- data/tests/wellformed/atom10/relative_uri.xml +7 -0
- data/tests/wellformed/atom10/relative_uri_inherit.xml +7 -0
- data/tests/wellformed/atom10/relative_uri_inherit_2.xml +7 -0
- data/tests/wellformed/base/cdf_item_abstract_xml_base.xml +18 -0
- data/tests/wellformed/base/entry_content_xml_base.xml +9 -0
- data/tests/wellformed/base/entry_content_xml_base_inherit.xml +9 -0
- data/tests/wellformed/base/entry_content_xml_base_inherit_2.xml +9 -0
- data/tests/wellformed/base/entry_content_xml_base_inherit_3.xml +10 -0
- data/tests/wellformed/base/entry_content_xml_base_inherit_4.xml +10 -0
- data/tests/wellformed/base/entry_summary_xml_base.xml +9 -0
- data/tests/wellformed/base/entry_summary_xml_base_inherit.xml +9 -0
- data/tests/wellformed/base/entry_summary_xml_base_inherit_2.xml +9 -0
- data/tests/wellformed/base/entry_summary_xml_base_inherit_3.xml +10 -0
- data/tests/wellformed/base/entry_summary_xml_base_inherit_4.xml +10 -0
- data/tests/wellformed/base/entry_title_xml_base.xml +9 -0
- data/tests/wellformed/base/entry_title_xml_base_inherit.xml +9 -0
- data/tests/wellformed/base/entry_title_xml_base_inherit_2.xml +9 -0
- data/tests/wellformed/base/entry_title_xml_base_inherit_3.xml +10 -0
- data/tests/wellformed/base/entry_title_xml_base_inherit_4.xml +10 -0
- data/tests/wellformed/base/feed_copyright_xml_base.xml +7 -0
- data/tests/wellformed/base/feed_copyright_xml_base_inherit.xml +7 -0
- data/tests/wellformed/base/feed_copyright_xml_base_inherit_2.xml +7 -0
- data/tests/wellformed/base/feed_copyright_xml_base_inherit_3.xml +8 -0
- data/tests/wellformed/base/feed_copyright_xml_base_inherit_4.xml +8 -0
- data/tests/wellformed/base/feed_info_xml_base.xml +7 -0
- data/tests/wellformed/base/feed_info_xml_base_inherit.xml +7 -0
- data/tests/wellformed/base/feed_info_xml_base_inherit_2.xml +7 -0
- data/tests/wellformed/base/feed_info_xml_base_inherit_3.xml +8 -0
- data/tests/wellformed/base/feed_info_xml_base_inherit_4.xml +8 -0
- data/tests/wellformed/base/feed_tagline_xml_base.xml +7 -0
- data/tests/wellformed/base/feed_tagline_xml_base_inherit.xml +7 -0
- data/tests/wellformed/base/feed_tagline_xml_base_inherit_2.xml +7 -0
- data/tests/wellformed/base/feed_tagline_xml_base_inherit_3.xml +8 -0
- data/tests/wellformed/base/feed_tagline_xml_base_inherit_4.xml +8 -0
- data/tests/wellformed/base/feed_title_xml_base.xml +7 -0
- data/tests/wellformed/base/feed_title_xml_base_inherit.xml +7 -0
- data/tests/wellformed/base/feed_title_xml_base_inherit_2.xml +7 -0
- data/tests/wellformed/base/feed_title_xml_base_inherit_3.xml +8 -0
- data/tests/wellformed/base/feed_title_xml_base_inherit_4.xml +8 -0
- data/tests/wellformed/base/http_channel_docs_base_content_location.xml +10 -0
- data/tests/wellformed/base/http_channel_docs_base_docuri.xml +9 -0
- data/tests/wellformed/base/http_channel_link_base_content_location.xml +10 -0
- data/tests/wellformed/base/http_channel_link_base_docuri.xml +9 -0
- data/tests/wellformed/base/http_entry_author_url_base_content_location.xml +12 -0
- data/tests/wellformed/base/http_entry_author_url_base_docuri.xml +11 -0
- data/tests/wellformed/base/http_entry_content_base64_base_content_location.xml +12 -0
- data/tests/wellformed/base/http_entry_content_base64_base_docuri.xml +11 -0
- data/tests/wellformed/base/http_entry_content_base_content_location.xml +10 -0
- data/tests/wellformed/base/http_entry_content_base_docuri.xml +9 -0
- data/tests/wellformed/base/http_entry_content_inline_base_content_location.xml +10 -0
- data/tests/wellformed/base/http_entry_content_inline_base_docuri.xml +9 -0
- data/tests/wellformed/base/http_entry_contributor_url_base_content_location.xml +12 -0
- data/tests/wellformed/base/http_entry_contributor_url_base_docuri.xml +11 -0
- data/tests/wellformed/base/http_entry_id_base_content_location.xml +10 -0
- data/tests/wellformed/base/http_entry_id_base_docuri.xml +9 -0
- data/tests/wellformed/base/http_entry_link_base_content_location.xml +10 -0
- data/tests/wellformed/base/http_entry_link_base_docuri.xml +9 -0
- data/tests/wellformed/base/http_entry_summary_base64_base_content_location.xml +12 -0
- data/tests/wellformed/base/http_entry_summary_base64_base_docuri.xml +11 -0
- data/tests/wellformed/base/http_entry_summary_base_content_location.xml +10 -0
- data/tests/wellformed/base/http_entry_summary_base_docuri.xml +9 -0
- data/tests/wellformed/base/http_entry_summary_inline_base_content_location.xml +10 -0
- data/tests/wellformed/base/http_entry_summary_inline_base_docuri.xml +9 -0
- data/tests/wellformed/base/http_entry_title_base64_base_content_location.xml +12 -0
- data/tests/wellformed/base/http_entry_title_base64_base_docuri.xml +11 -0
- data/tests/wellformed/base/http_entry_title_base_content_location.xml +10 -0
- data/tests/wellformed/base/http_entry_title_base_docuri.xml +9 -0
- data/tests/wellformed/base/http_entry_title_inline_base_content_location.xml +10 -0
- data/tests/wellformed/base/http_entry_title_inline_base_docuri.xml +9 -0
- data/tests/wellformed/base/http_feed_author_url_base_content_location.xml +10 -0
- data/tests/wellformed/base/http_feed_author_url_base_docuri.xml +9 -0
- data/tests/wellformed/base/http_feed_contributor_url_base_content_location.xml +10 -0
- data/tests/wellformed/base/http_feed_contributor_url_base_docuri.xml +9 -0
- data/tests/wellformed/base/http_feed_copyright_base64_base_content_location.xml +10 -0
- data/tests/wellformed/base/http_feed_copyright_base64_base_docuri.xml +9 -0
- data/tests/wellformed/base/http_feed_copyright_base_content_location.xml +8 -0
- data/tests/wellformed/base/http_feed_copyright_base_docuri.xml +7 -0
- data/tests/wellformed/base/http_feed_copyright_inline_base_content_location.xml +8 -0
- data/tests/wellformed/base/http_feed_copyright_inline_base_docuri.xml +7 -0
- data/tests/wellformed/base/http_feed_generator_url_base_content_location.xml +8 -0
- data/tests/wellformed/base/http_feed_generator_url_base_docuri.xml +7 -0
- data/tests/wellformed/base/http_feed_id_base_content_location.xml +8 -0
- data/tests/wellformed/base/http_feed_id_base_docuri.xml +7 -0
- data/tests/wellformed/base/http_feed_info_base64_base_content_location.xml +10 -0
- data/tests/wellformed/base/http_feed_info_base64_base_docuri.xml +9 -0
- data/tests/wellformed/base/http_feed_info_base_content_location.xml +8 -0
- data/tests/wellformed/base/http_feed_info_base_docuri.xml +7 -0
- data/tests/wellformed/base/http_feed_info_inline_base_content_location.xml +8 -0
- data/tests/wellformed/base/http_feed_info_inline_base_docuri.xml +7 -0
- data/tests/wellformed/base/http_feed_link_base_content_location.xml +8 -0
- data/tests/wellformed/base/http_feed_link_base_docuri.xml +7 -0
- data/tests/wellformed/base/http_feed_tagline_base64_base_content_location.xml +10 -0
- data/tests/wellformed/base/http_feed_tagline_base64_base_docuri.xml +9 -0
- data/tests/wellformed/base/http_feed_tagline_base_content_location.xml +8 -0
- data/tests/wellformed/base/http_feed_tagline_base_docuri.xml +7 -0
- data/tests/wellformed/base/http_feed_tagline_inline_base_content_location.xml +8 -0
- data/tests/wellformed/base/http_feed_tagline_inline_base_docuri.xml +7 -0
- data/tests/wellformed/base/http_feed_title_base64_base_content_location.xml +10 -0
- data/tests/wellformed/base/http_feed_title_base64_base_docuri.xml +9 -0
- data/tests/wellformed/base/http_feed_title_base_content_location.xml +8 -0
- data/tests/wellformed/base/http_feed_title_base_docuri.xml +7 -0
- data/tests/wellformed/base/http_feed_title_inline_base_content_location.xml +8 -0
- data/tests/wellformed/base/http_feed_title_inline_base_docuri.xml +7 -0
- data/tests/wellformed/base/http_item_body_base_content_location.xml +12 -0
- data/tests/wellformed/base/http_item_body_base_docuri.xml +11 -0
- data/tests/wellformed/base/http_item_comments_base_content_location.xml +12 -0
- data/tests/wellformed/base/http_item_comments_base_docuri.xml +11 -0
- data/tests/wellformed/base/http_item_content_encoded_base_content_location.xml +12 -0
- data/tests/wellformed/base/http_item_content_encoded_base_docuri.xml +11 -0
- data/tests/wellformed/base/http_item_description_base_content_location.xml +12 -0
- data/tests/wellformed/base/http_item_description_base_docuri.xml +11 -0
- data/tests/wellformed/base/http_item_fullitem_base_content_location.xml +12 -0
- data/tests/wellformed/base/http_item_fullitem_base_docuri.xml +11 -0
- data/tests/wellformed/base/http_item_link_base_content_location.xml +12 -0
- data/tests/wellformed/base/http_item_link_base_docuri.xml +11 -0
- data/tests/wellformed/base/http_item_wfw_commentRSS_base_content_location.xml +12 -0
- data/tests/wellformed/base/http_item_wfw_commentRSS_base_docuri.xml +11 -0
- data/tests/wellformed/base/http_item_wfw_comment_base_content_location.xml +12 -0
- data/tests/wellformed/base/http_item_wfw_comment_base_docuri.xml +11 -0
- data/tests/wellformed/base/http_item_xhtml_body_base_content_location.xml +12 -0
- data/tests/wellformed/base/http_item_xhtml_body_base_docuri.xml +11 -0
- data/tests/wellformed/base/http_relative_xml_base.xml +10 -0
- data/tests/wellformed/base/http_relative_xml_base_2.xml +10 -0
- data/tests/wellformed/base/malformed_base.xml +9 -0
- data/tests/wellformed/base/relative_xml_base.xml +9 -0
- data/tests/wellformed/base/relative_xml_base_2.xml +9 -0
- data/tests/wellformed/cdf/channel_abstract_map_description.xml +7 -0
- data/tests/wellformed/cdf/channel_abstract_map_tagline.xml +7 -0
- data/tests/wellformed/cdf/channel_href_map_link.xml +6 -0
- data/tests/wellformed/cdf/channel_href_map_links.xml +6 -0
- data/tests/wellformed/cdf/channel_title.xml +7 -0
- data/tests/wellformed/cdf/item_abstract_map_description.xml +9 -0
- data/tests/wellformed/cdf/item_abstract_map_summary.xml +9 -0
- data/tests/wellformed/cdf/item_href_map_link.xml +8 -0
- data/tests/wellformed/cdf/item_href_map_links.xml +8 -0
- data/tests/wellformed/cdf/item_title.xml +9 -0
- data/tests/wellformed/date/cdf_channel_lastmod_map_date.xml +6 -0
- data/tests/wellformed/date/cdf_channel_lastmod_map_modified.xml +6 -0
- data/tests/wellformed/date/cdf_channel_lastmod_map_modified_parsed.xml +6 -0
- data/tests/wellformed/date/cdf_item_lastmod_map_date.xml +8 -0
- data/tests/wellformed/date/cdf_item_lastmod_map_modified.xml +8 -0
- data/tests/wellformed/date/cdf_item_lastmod_map_modified_parsed.xml +8 -0
- data/tests/wellformed/date/channel_dc_date.xml +9 -0
- data/tests/wellformed/date/channel_dc_date_map_modified.xml +9 -0
- data/tests/wellformed/date/channel_dc_date_w3dtf_utc.xml +9 -0
- data/tests/wellformed/date/channel_dc_date_w3dtf_utc_map_modified_parsed.xml +9 -0
- data/tests/wellformed/date/channel_dcterms_created.xml +9 -0
- data/tests/wellformed/date/channel_dcterms_created_w3dtf_utc.xml +9 -0
- data/tests/wellformed/date/channel_dcterms_issued.xml +9 -0
- data/tests/wellformed/date/channel_dcterms_issued_w3dtf_utc.xml +9 -0
- data/tests/wellformed/date/channel_dcterms_modified.xml +9 -0
- data/tests/wellformed/date/channel_dcterms_modified_map_date.xml +9 -0
- data/tests/wellformed/date/channel_dcterms_modified_w3dtf_utc.xml +9 -0
- data/tests/wellformed/date/channel_dcterms_modified_w3dtf_utc_map_date.xml +9 -0
- data/tests/wellformed/date/channel_pubDate.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_asctime.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_disney.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_disney_at.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_disney_ct.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_disney_mt.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_disney_pt.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_greek_1.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_hungarian_1.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_iso8601_ym.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_iso8601_ym_2.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_iso8601_ymd.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_iso8601_ymd_2.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_iso8601_yo_2.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_korean_nate.xml +11 -0
- data/tests/wellformed/date/channel_pubDate_map_modified.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_mssql.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_mssql_nofraction.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_nosecond.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_notime.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_rfc2822.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_rfc2822_rollover_june_31.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_rfc822.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_25h.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_61m.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_61s.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_leapyear.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_leapyear400.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_w3dtf_rollover_nonleapyear.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_w3dtf_sf.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_w3dtf_tokyo.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_w3dtf_utc.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_w3dtf_y.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_w3dtf_ym.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_w3dtf_ymd.xml +9 -0
- data/tests/wellformed/date/channel_pubDate_w3dtf_ymd_2.xml +9 -0
- data/tests/wellformed/date/entry_created.xml +9 -0
- data/tests/wellformed/date/entry_created_w3dtf_utc.xml +9 -0
- data/tests/wellformed/date/entry_issued.xml +9 -0
- data/tests/wellformed/date/entry_issued_w3dtf_utc.xml +9 -0
- data/tests/wellformed/date/entry_modified.xml +9 -0
- data/tests/wellformed/date/entry_modified_map_date.xml +9 -0
- data/tests/wellformed/date/entry_modified_w3dtf_utc.xml +9 -0
- data/tests/wellformed/date/entry_published_w3dtf_utc.xml +9 -0
- data/tests/wellformed/date/entry_source_updated_w3dtf_utc.xml +11 -0
- data/tests/wellformed/date/entry_updated_w3dtf_utc.xml +9 -0
- data/tests/wellformed/date/feed_modified.xml +9 -0
- data/tests/wellformed/date/feed_modified_asctime.xml +9 -0
- data/tests/wellformed/date/feed_modified_disney.xml +7 -0
- data/tests/wellformed/date/feed_modified_disney_at.xml +7 -0
- data/tests/wellformed/date/feed_modified_disney_ct.xml +7 -0
- data/tests/wellformed/date/feed_modified_disney_mt.xml +7 -0
- data/tests/wellformed/date/feed_modified_disney_pt.xml +7 -0
- data/tests/wellformed/date/feed_modified_iso8601_ym.xml +9 -0
- data/tests/wellformed/date/feed_modified_iso8601_ym_2.xml +9 -0
- data/tests/wellformed/date/feed_modified_iso8601_ymd.xml +9 -0
- data/tests/wellformed/date/feed_modified_iso8601_ymd_2.xml +9 -0
- data/tests/wellformed/date/feed_modified_iso8601_yo_2.xml +9 -0
- data/tests/wellformed/date/feed_modified_map_date.xml +9 -0
- data/tests/wellformed/date/feed_modified_rfc2822.xml +9 -0
- data/tests/wellformed/date/feed_modified_rfc2822_rollover_june_31.xml +9 -0
- data/tests/wellformed/date/feed_modified_rfc822.xml +9 -0
- data/tests/wellformed/date/feed_modified_w3dtf_rollover_leapyear.xml +9 -0
- data/tests/wellformed/date/feed_modified_w3dtf_rollover_leapyear400.xml +9 -0
- data/tests/wellformed/date/feed_modified_w3dtf_rollover_nonleapyear.xml +9 -0
- data/tests/wellformed/date/feed_modified_w3dtf_sf.xml +9 -0
- data/tests/wellformed/date/feed_modified_w3dtf_tokyo.xml +9 -0
- data/tests/wellformed/date/feed_modified_w3dtf_utc.xml +9 -0
- data/tests/wellformed/date/feed_modified_w3dtf_y.xml +9 -0
- data/tests/wellformed/date/feed_modified_w3dtf_ym.xml +9 -0
- data/tests/wellformed/date/feed_modified_w3dtf_ymd.xml +9 -0
- data/tests/wellformed/date/feed_modified_w3dtf_ymd_2.xml +9 -0
- data/tests/wellformed/date/feed_updated_w3dtf_utc.xml +7 -0
- data/tests/wellformed/date/item_dc_date.xml +11 -0
- data/tests/wellformed/date/item_dc_date_map_modified.xml +11 -0
- data/tests/wellformed/date/item_dc_date_w3dtf_utc.xml +11 -0
- data/tests/wellformed/date/item_dc_date_w3dtf_utc_map_modified_parsed.xml +11 -0
- data/tests/wellformed/date/item_dcterms_created.xml +11 -0
- data/tests/wellformed/date/item_dcterms_created_w3dtf_utc.xml +11 -0
- data/tests/wellformed/date/item_dcterms_issued.xml +11 -0
- data/tests/wellformed/date/item_dcterms_issued_w3dtf_utc.xml +11 -0
- data/tests/wellformed/date/item_dcterms_modified.xml +11 -0
- data/tests/wellformed/date/item_dcterms_modified_map_date.xml +11 -0
- data/tests/wellformed/date/item_dcterms_modified_w3dtf_utc.xml +11 -0
- data/tests/wellformed/date/item_dcterms_modified_w3dtf_utc_map_date.xml +11 -0
- data/tests/wellformed/date/item_expirationDate.xml +11 -0
- data/tests/wellformed/date/item_expirationDate_rfc2822.xml +11 -0
- data/tests/wellformed/date/item_pubDate.xml +11 -0
- data/tests/wellformed/date/item_pubDate_euc-kr.xml +13 -0
- data/tests/wellformed/date/item_pubDate_map_modified.xml +11 -0
- data/tests/wellformed/date/item_pubDate_rfc2822.xml +11 -0
- data/tests/wellformed/encoding/big5.xml +8 -0
- data/tests/wellformed/encoding/csucs4.xml +0 -0
- data/tests/wellformed/encoding/csunicode.xml +0 -0
- data/tests/wellformed/encoding/encoding_attribute_crash.xml +9 -0
- data/tests/wellformed/encoding/encoding_attribute_crash_2.xml +9 -0
- data/tests/wellformed/encoding/euc-kr-attribute.xml +14 -0
- data/tests/wellformed/encoding/euc-kr-item.xml +14 -0
- data/tests/wellformed/encoding/euc-kr.xml +12 -0
- data/tests/wellformed/encoding/http_application_atom_xml_charset.xml +8 -0
- data/tests/wellformed/encoding/http_application_atom_xml_charset_overrides_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_application_atom_xml_default.xml +8 -0
- data/tests/wellformed/encoding/http_application_atom_xml_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_application_rss_xml_charset.xml +8 -0
- data/tests/wellformed/encoding/http_application_rss_xml_charset_overrides_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_application_rss_xml_default.xml +8 -0
- data/tests/wellformed/encoding/http_application_rss_xml_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_application_xml_charset.xml +8 -0
- data/tests/wellformed/encoding/http_application_xml_charset_overrides_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_application_xml_default.xml +8 -0
- data/tests/wellformed/encoding/http_application_xml_dtd_charset.xml +8 -0
- data/tests/wellformed/encoding/http_application_xml_dtd_charset_overrides_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_application_xml_dtd_default.xml +8 -0
- data/tests/wellformed/encoding/http_application_xml_dtd_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_application_xml_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_application_xml_epe_charset.xml +8 -0
- data/tests/wellformed/encoding/http_application_xml_epe_charset_overrides_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_application_xml_epe_default.xml +8 -0
- data/tests/wellformed/encoding/http_application_xml_epe_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_encoding_attribute_crash.xml +13 -0
- data/tests/wellformed/encoding/http_i18n.xml +13 -0
- data/tests/wellformed/encoding/http_text_atom_xml_charset.xml +8 -0
- data/tests/wellformed/encoding/http_text_atom_xml_charset_overrides_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_text_atom_xml_default.xml +8 -0
- data/tests/wellformed/encoding/http_text_atom_xml_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_text_rss_xml_charset.xml +8 -0
- data/tests/wellformed/encoding/http_text_rss_xml_charset_overrides_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_text_rss_xml_default.xml +8 -0
- data/tests/wellformed/encoding/http_text_rss_xml_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_text_xml_bogus_charset.xml +8 -0
- data/tests/wellformed/encoding/http_text_xml_bogus_param.xml +8 -0
- data/tests/wellformed/encoding/http_text_xml_charset.xml +8 -0
- data/tests/wellformed/encoding/http_text_xml_charset_2.xml +16 -0
- data/tests/wellformed/encoding/http_text_xml_charset_overrides_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_text_xml_charset_overrides_encoding_2.xml +17 -0
- data/tests/wellformed/encoding/http_text_xml_default.xml +8 -0
- data/tests/wellformed/encoding/http_text_xml_epe_charset.xml +8 -0
- data/tests/wellformed/encoding/http_text_xml_epe_charset_overrides_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_text_xml_epe_default.xml +8 -0
- data/tests/wellformed/encoding/http_text_xml_epe_encoding.xml +8 -0
- data/tests/wellformed/encoding/http_text_xml_qs.xml +8 -0
- data/tests/wellformed/encoding/iso-10646-ucs-2.xml +0 -0
- data/tests/wellformed/encoding/iso-10646-ucs-4.xml +0 -0
- data/tests/wellformed/encoding/no_content_type_default.xml +7 -0
- data/tests/wellformed/encoding/no_content_type_encoding.xml +7 -0
- data/tests/wellformed/encoding/u16.xml +0 -0
- data/tests/wellformed/encoding/ucs-2.xml +0 -0
- data/tests/wellformed/encoding/ucs-4.xml +0 -0
- data/tests/wellformed/encoding/utf-16be-autodetect.xml +0 -0
- data/tests/wellformed/encoding/utf-16be-bom.xml +0 -0
- data/tests/wellformed/encoding/utf-16be.xml +0 -0
- data/tests/wellformed/encoding/utf-16le-autodetect.xml +0 -0
- data/tests/wellformed/encoding/utf-16le-bom.xml +0 -0
- data/tests/wellformed/encoding/utf-16le.xml +0 -0
- data/tests/wellformed/encoding/utf-32be-autodetect.xml +0 -0
- data/tests/wellformed/encoding/utf-32be-bom.xml +0 -0
- data/tests/wellformed/encoding/utf-32be.xml +0 -0
- data/tests/wellformed/encoding/utf-32le-autodetect.xml +0 -0
- data/tests/wellformed/encoding/utf-32le-bom.xml +0 -0
- data/tests/wellformed/encoding/utf-32le.xml +0 -0
- data/tests/wellformed/encoding/utf-8-bom.xml +8 -0
- data/tests/wellformed/encoding/utf16.xml +0 -0
- data/tests/wellformed/encoding/utf_16.xml +0 -0
- data/tests/wellformed/encoding/utf_32.xml +0 -0
- data/tests/wellformed/encoding/x80_437.xml +9 -0
- data/tests/wellformed/encoding/x80_850.xml +9 -0
- data/tests/wellformed/encoding/x80_852.xml +9 -0
- data/tests/wellformed/encoding/x80_855.xml +9 -0
- data/tests/wellformed/encoding/x80_857.xml +9 -0
- data/tests/wellformed/encoding/x80_860.xml +9 -0
- data/tests/wellformed/encoding/x80_861.xml +9 -0
- data/tests/wellformed/encoding/x80_862.xml +9 -0
- data/tests/wellformed/encoding/x80_863.xml +9 -0
- data/tests/wellformed/encoding/x80_865.xml +9 -0
- data/tests/wellformed/encoding/x80_866.xml +9 -0
- data/tests/wellformed/encoding/x80_cp037.xml +1 -0
- data/tests/wellformed/encoding/x80_cp1125.xml +9 -0
- data/tests/wellformed/encoding/x80_cp1250.xml +9 -0
- data/tests/wellformed/encoding/x80_cp1251.xml +9 -0
- data/tests/wellformed/encoding/x80_cp1252.xml +9 -0
- data/tests/wellformed/encoding/x80_cp1253.xml +9 -0
- data/tests/wellformed/encoding/x80_cp1254.xml +9 -0
- data/tests/wellformed/encoding/x80_cp1255.xml +9 -0
- data/tests/wellformed/encoding/x80_cp1256.xml +9 -0
- data/tests/wellformed/encoding/x80_cp1257.xml +9 -0
- data/tests/wellformed/encoding/x80_cp1258.xml +9 -0
- data/tests/wellformed/encoding/x80_cp437.xml +9 -0
- data/tests/wellformed/encoding/x80_cp500.xml +1 -0
- data/tests/wellformed/encoding/x80_cp737.xml +9 -0
- data/tests/wellformed/encoding/x80_cp775.xml +9 -0
- data/tests/wellformed/encoding/x80_cp850.xml +9 -0
- data/tests/wellformed/encoding/x80_cp852.xml +9 -0
- data/tests/wellformed/encoding/x80_cp855.xml +9 -0
- data/tests/wellformed/encoding/x80_cp856.xml +9 -0
- data/tests/wellformed/encoding/x80_cp857.xml +9 -0
- data/tests/wellformed/encoding/x80_cp860.xml +9 -0
- data/tests/wellformed/encoding/x80_cp861.xml +9 -0
- data/tests/wellformed/encoding/x80_cp862.xml +9 -0
- data/tests/wellformed/encoding/x80_cp863.xml +9 -0
- data/tests/wellformed/encoding/x80_cp864.xml +9 -0
- data/tests/wellformed/encoding/x80_cp865.xml +9 -0
- data/tests/wellformed/encoding/x80_cp866.xml +9 -0
- data/tests/wellformed/encoding/x80_cp874.xml +9 -0
- data/tests/wellformed/encoding/x80_cp875.xml +1 -0
- data/tests/wellformed/encoding/x80_cp_is.xml +9 -0
- data/tests/wellformed/encoding/x80_csibm037.xml +1 -0
- data/tests/wellformed/encoding/x80_csibm500.xml +1 -0
- data/tests/wellformed/encoding/x80_csibm855.xml +9 -0
- data/tests/wellformed/encoding/x80_csibm857.xml +9 -0
- data/tests/wellformed/encoding/x80_csibm860.xml +9 -0
- data/tests/wellformed/encoding/x80_csibm861.xml +9 -0
- data/tests/wellformed/encoding/x80_csibm863.xml +9 -0
- data/tests/wellformed/encoding/x80_csibm864.xml +9 -0
- data/tests/wellformed/encoding/x80_csibm865.xml +9 -0
- data/tests/wellformed/encoding/x80_csibm866.xml +9 -0
- data/tests/wellformed/encoding/x80_cskoi8r.xml +9 -0
- data/tests/wellformed/encoding/x80_csmacintosh.xml +9 -0
- data/tests/wellformed/encoding/x80_cspc775baltic.xml +9 -0
- data/tests/wellformed/encoding/x80_cspc850multilingual.xml +9 -0
- data/tests/wellformed/encoding/x80_cspc862latinhebrew.xml +9 -0
- data/tests/wellformed/encoding/x80_cspc8codepage437.xml +9 -0
- data/tests/wellformed/encoding/x80_cspcp852.xml +9 -0
- data/tests/wellformed/encoding/x80_dbcs.xml +9 -0
- data/tests/wellformed/encoding/x80_ebcdic-cp-be.xml +1 -0
- data/tests/wellformed/encoding/x80_ebcdic-cp-ca.xml +1 -0
- data/tests/wellformed/encoding/x80_ebcdic-cp-ch.xml +1 -0
- data/tests/wellformed/encoding/x80_ebcdic-cp-nl.xml +1 -0
- data/tests/wellformed/encoding/x80_ebcdic-cp-us.xml +1 -0
- data/tests/wellformed/encoding/x80_ebcdic-cp-wt.xml +1 -0
- data/tests/wellformed/encoding/x80_ebcdic_cp_be.xml +1 -0
- data/tests/wellformed/encoding/x80_ebcdic_cp_ca.xml +1 -0
- data/tests/wellformed/encoding/x80_ebcdic_cp_ch.xml +1 -0
- data/tests/wellformed/encoding/x80_ebcdic_cp_nl.xml +1 -0
- data/tests/wellformed/encoding/x80_ebcdic_cp_us.xml +1 -0
- data/tests/wellformed/encoding/x80_ebcdic_cp_wt.xml +1 -0
- data/tests/wellformed/encoding/x80_ibm037.xml +1 -0
- data/tests/wellformed/encoding/x80_ibm039.xml +1 -0
- data/tests/wellformed/encoding/x80_ibm1140.xml +1 -0
- data/tests/wellformed/encoding/x80_ibm437.xml +9 -0
- data/tests/wellformed/encoding/x80_ibm500.xml +1 -0
- data/tests/wellformed/encoding/x80_ibm775.xml +9 -0
- data/tests/wellformed/encoding/x80_ibm850.xml +9 -0
- data/tests/wellformed/encoding/x80_ibm852.xml +9 -0
- data/tests/wellformed/encoding/x80_ibm855.xml +9 -0
- data/tests/wellformed/encoding/x80_ibm857.xml +9 -0
- data/tests/wellformed/encoding/x80_ibm860.xml +9 -0
- data/tests/wellformed/encoding/x80_ibm861.xml +9 -0
- data/tests/wellformed/encoding/x80_ibm862.xml +9 -0
- data/tests/wellformed/encoding/x80_ibm863.xml +9 -0
- data/tests/wellformed/encoding/x80_ibm864.xml +9 -0
- data/tests/wellformed/encoding/x80_ibm865.xml +9 -0
- data/tests/wellformed/encoding/x80_ibm866.xml +9 -0
- data/tests/wellformed/encoding/x80_koi8-r.xml +9 -0
- data/tests/wellformed/encoding/x80_koi8-t.xml +9 -0
- data/tests/wellformed/encoding/x80_koi8-u.xml +9 -0
- data/tests/wellformed/encoding/x80_mac-cyrillic.xml +9 -0
- data/tests/wellformed/encoding/x80_mac.xml +9 -0
- data/tests/wellformed/encoding/x80_maccentraleurope.xml +9 -0
- data/tests/wellformed/encoding/x80_maccyrillic.xml +9 -0
- data/tests/wellformed/encoding/x80_macgreek.xml +9 -0
- data/tests/wellformed/encoding/x80_maciceland.xml +9 -0
- data/tests/wellformed/encoding/x80_macintosh.xml +9 -0
- data/tests/wellformed/encoding/x80_maclatin2.xml +9 -0
- data/tests/wellformed/encoding/x80_macroman.xml +9 -0
- data/tests/wellformed/encoding/x80_macturkish.xml +9 -0
- data/tests/wellformed/encoding/x80_ms-ansi.xml +9 -0
- data/tests/wellformed/encoding/x80_ms-arab.xml +9 -0
- data/tests/wellformed/encoding/x80_ms-cyrl.xml +9 -0
- data/tests/wellformed/encoding/x80_ms-ee.xml +9 -0
- data/tests/wellformed/encoding/x80_ms-greek.xml +9 -0
- data/tests/wellformed/encoding/x80_ms-hebr.xml +9 -0
- data/tests/wellformed/encoding/x80_ms-turk.xml +9 -0
- data/tests/wellformed/encoding/x80_tcvn-5712.xml +9 -0
- data/tests/wellformed/encoding/x80_tcvn.xml +9 -0
- data/tests/wellformed/encoding/x80_tcvn5712-1.xml +9 -0
- data/tests/wellformed/encoding/x80_viscii.xml +9 -0
- data/tests/wellformed/encoding/x80_winbaltrim.xml +9 -0
- data/tests/wellformed/encoding/x80_windows-1250.xml +9 -0
- data/tests/wellformed/encoding/x80_windows-1251.xml +9 -0
- data/tests/wellformed/encoding/x80_windows-1252.xml +9 -0
- data/tests/wellformed/encoding/x80_windows-1253.xml +9 -0
- data/tests/wellformed/encoding/x80_windows-1254.xml +9 -0
- data/tests/wellformed/encoding/x80_windows-1255.xml +9 -0
- data/tests/wellformed/encoding/x80_windows-1256.xml +9 -0
- data/tests/wellformed/encoding/x80_windows-1257.xml +9 -0
- data/tests/wellformed/encoding/x80_windows-1258.xml +9 -0
- data/tests/wellformed/encoding/x80_windows_1250.xml +9 -0
- data/tests/wellformed/encoding/x80_windows_1251.xml +9 -0
- data/tests/wellformed/encoding/x80_windows_1252.xml +9 -0
- data/tests/wellformed/encoding/x80_windows_1253.xml +9 -0
- data/tests/wellformed/encoding/x80_windows_1254.xml +9 -0
- data/tests/wellformed/encoding/x80_windows_1255.xml +9 -0
- data/tests/wellformed/encoding/x80_windows_1256.xml +9 -0
- data/tests/wellformed/encoding/x80_windows_1257.xml +9 -0
- data/tests/wellformed/encoding/x80_windows_1258.xml +9 -0
- data/tests/wellformed/feedburner/feedburner_browserfriendly.xml +9 -0
- data/tests/wellformed/http/headers_foo.xml +7 -0
- data/tests/wellformed/itunes/itunes_channel_block.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_block_false.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_block_no.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_block_true.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_block_uppercase.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_block_whitespace.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_category.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_category_nested.xml +11 -0
- data/tests/wellformed/itunes/itunes_channel_category_scheme.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_explicit.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_explicit_false.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_explicit_no.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_explicit_true.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_explicit_uppercase.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_explicit_whitespace.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_image.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_keywords.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_keywords_duplicate.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_keywords_duplicate_2.xml +10 -0
- data/tests/wellformed/itunes/itunes_channel_keywords_multiple.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_link_image.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_owner_email.xml +12 -0
- data/tests/wellformed/itunes/itunes_channel_owner_name.xml +12 -0
- data/tests/wellformed/itunes/itunes_channel_subtitle.xml +9 -0
- data/tests/wellformed/itunes/itunes_channel_summary.xml +9 -0
- data/tests/wellformed/itunes/itunes_core_element_uppercase.xml +9 -0
- data/tests/wellformed/itunes/itunes_enclosure_url_maps_id.xml +11 -0
- data/tests/wellformed/itunes/itunes_enclosure_url_maps_id_2.xml +12 -0
- data/tests/wellformed/itunes/itunes_item_author_map_author.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_block.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_block_false.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_block_no.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_block_true.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_block_uppercase.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_block_whitespace.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_category.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_category_nested.xml +13 -0
- data/tests/wellformed/itunes/itunes_item_category_scheme.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_duration.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_explicit.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_explicit_false.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_explicit_no.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_explicit_true.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_explicit_uppercase.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_explicit_whitespace.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_image.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_link_image.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_subtitle.xml +11 -0
- data/tests/wellformed/itunes/itunes_item_summary.xml +11 -0
- data/tests/wellformed/itunes/itunes_link_enclosure_maps_id.xml +9 -0
- data/tests/wellformed/itunes/itunes_link_enclosure_maps_id_2.xml +10 -0
- data/tests/wellformed/itunes/itunes_namespace.xml +9 -0
- data/tests/wellformed/itunes/itunes_namespace_example.xml +9 -0
- data/tests/wellformed/itunes/itunes_namespace_lowercase.xml +9 -0
- data/tests/wellformed/itunes/itunes_namespace_uppercase.xml +9 -0
- data/tests/wellformed/lang/channel_dc_language.xml +9 -0
- data/tests/wellformed/lang/channel_language.xml +9 -0
- data/tests/wellformed/lang/entry_content_xml_lang.xml +9 -0
- data/tests/wellformed/lang/entry_content_xml_lang_blank.xml +9 -0
- data/tests/wellformed/lang/entry_content_xml_lang_blank_2.xml +9 -0
- data/tests/wellformed/lang/entry_content_xml_lang_blank_3.xml +12 -0
- data/tests/wellformed/lang/entry_content_xml_lang_inherit.xml +9 -0
- data/tests/wellformed/lang/entry_content_xml_lang_inherit_2.xml +9 -0
- data/tests/wellformed/lang/entry_content_xml_lang_inherit_3.xml +10 -0
- data/tests/wellformed/lang/entry_content_xml_lang_inherit_4.xml +10 -0
- data/tests/wellformed/lang/entry_summary_xml_lang.xml +9 -0
- data/tests/wellformed/lang/entry_summary_xml_lang_blank.xml +9 -0
- data/tests/wellformed/lang/entry_summary_xml_lang_inherit.xml +9 -0
- data/tests/wellformed/lang/entry_summary_xml_lang_inherit_2.xml +9 -0
- data/tests/wellformed/lang/entry_summary_xml_lang_inherit_3.xml +10 -0
- data/tests/wellformed/lang/entry_summary_xml_lang_inherit_4.xml +10 -0
- data/tests/wellformed/lang/entry_title_xml_lang.xml +9 -0
- data/tests/wellformed/lang/entry_title_xml_lang_blank.xml +9 -0
- data/tests/wellformed/lang/entry_title_xml_lang_inherit.xml +9 -0
- data/tests/wellformed/lang/entry_title_xml_lang_inherit_2.xml +9 -0
- data/tests/wellformed/lang/entry_title_xml_lang_inherit_3.xml +10 -0
- data/tests/wellformed/lang/entry_title_xml_lang_inherit_4.xml +10 -0
- data/tests/wellformed/lang/feed_copyright_xml_lang.xml +7 -0
- data/tests/wellformed/lang/feed_copyright_xml_lang_blank.xml +7 -0
- data/tests/wellformed/lang/feed_copyright_xml_lang_inherit.xml +7 -0
- data/tests/wellformed/lang/feed_copyright_xml_lang_inherit_2.xml +7 -0
- data/tests/wellformed/lang/feed_copyright_xml_lang_inherit_3.xml +8 -0
- data/tests/wellformed/lang/feed_copyright_xml_lang_inherit_4.xml +8 -0
- data/tests/wellformed/lang/feed_info_xml_lang.xml +7 -0
- data/tests/wellformed/lang/feed_info_xml_lang_blank.xml +7 -0
- data/tests/wellformed/lang/feed_info_xml_lang_inherit.xml +7 -0
- data/tests/wellformed/lang/feed_info_xml_lang_inherit_2.xml +7 -0
- data/tests/wellformed/lang/feed_info_xml_lang_inherit_3.xml +8 -0
- data/tests/wellformed/lang/feed_info_xml_lang_inherit_4.xml +8 -0
- data/tests/wellformed/lang/feed_language.xml +9 -0
- data/tests/wellformed/lang/feed_language_override.xml +9 -0
- data/tests/wellformed/lang/feed_not_xml_lang.xml +7 -0
- data/tests/wellformed/lang/feed_not_xml_lang_2.xml +7 -0
- data/tests/wellformed/lang/feed_tagline_xml_lang.xml +7 -0
- data/tests/wellformed/lang/feed_tagline_xml_lang_blank.xml +7 -0
- data/tests/wellformed/lang/feed_tagline_xml_lang_inherit.xml +7 -0
- data/tests/wellformed/lang/feed_tagline_xml_lang_inherit_2.xml +7 -0
- data/tests/wellformed/lang/feed_tagline_xml_lang_inherit_3.xml +8 -0
- data/tests/wellformed/lang/feed_tagline_xml_lang_inherit_4.xml +8 -0
- data/tests/wellformed/lang/feed_title_xml_lang.xml +7 -0
- data/tests/wellformed/lang/feed_title_xml_lang_blank.xml +7 -0
- data/tests/wellformed/lang/feed_title_xml_lang_inherit.xml +7 -0
- data/tests/wellformed/lang/feed_title_xml_lang_inherit_2.xml +7 -0
- data/tests/wellformed/lang/feed_title_xml_lang_inherit_3.xml +8 -0
- data/tests/wellformed/lang/feed_title_xml_lang_inherit_4.xml +8 -0
- data/tests/wellformed/lang/feed_xml_lang.xml +6 -0
- data/tests/wellformed/lang/http_content_language.xml +7 -0
- data/tests/wellformed/lang/http_content_language_entry_title_inherit.xml +10 -0
- data/tests/wellformed/lang/http_content_language_entry_title_inherit_2.xml +11 -0
- data/tests/wellformed/lang/http_content_language_feed_language.xml +10 -0
- data/tests/wellformed/lang/http_content_language_feed_xml_lang.xml +7 -0
- data/tests/wellformed/lang/item_content_encoded_xml_lang.xml +11 -0
- data/tests/wellformed/lang/item_content_encoded_xml_lang_inherit.xml +11 -0
- data/tests/wellformed/lang/item_dc_language.xml +11 -0
- data/tests/wellformed/lang/item_fullitem_xml_lang.xml +11 -0
- data/tests/wellformed/lang/item_fullitem_xml_lang_inherit.xml +11 -0
- data/tests/wellformed/lang/item_xhtml_body_xml_lang.xml +13 -0
- data/tests/wellformed/lang/item_xhtml_body_xml_lang_inherit.xml +13 -0
- data/tests/wellformed/namespace/rss1.0withModules.xml +47 -0
- data/tests/wellformed/namespace/rss1.0withModulesNoDefNS.xml +48 -0
- data/tests/wellformed/namespace/rss1.0withModulesNoDefNSLocalNameClash.xml +53 -0
- data/tests/wellformed/namespace/rss2.0NSwithModules.xml +50 -0
- data/tests/wellformed/namespace/rss2.0NSwithModulesNoDefNS.xml +50 -0
- data/tests/wellformed/namespace/rss2.0NSwithModulesNoDefNSLocalNameClash.xml +58 -0
- data/tests/wellformed/namespace/rss2.0noNSwithModules.xml +49 -0
- data/tests/wellformed/namespace/rss2.0noNSwithModulesLocalNameClash.xml +57 -0
- data/tests/wellformed/rdf/doctype_contains_entity_decl.xml +17 -0
- data/tests/wellformed/rdf/rdf_channel_description.xml +9 -0
- data/tests/wellformed/rdf/rdf_channel_link.xml +9 -0
- data/tests/wellformed/rdf/rdf_channel_title.xml +9 -0
- data/tests/wellformed/rdf/rdf_item_description.xml +16 -0
- data/tests/wellformed/rdf/rdf_item_link.xml +16 -0
- data/tests/wellformed/rdf/rdf_item_rdf_about.xml +15 -0
- data/tests/wellformed/rdf/rdf_item_title.xml +16 -0
- data/tests/wellformed/rdf/rss090_channel_title.xml +12 -0
- data/tests/wellformed/rdf/rss090_item_title.xml +12 -0
- data/tests/wellformed/rdf/rss_version_10.xml +6 -0
- data/tests/wellformed/rdf/rss_version_10_not_default_ns.xml +8 -0
- data/tests/wellformed/rss/aaa_wellformed.xml +6 -0
- data/tests/wellformed/rss/channel_author.xml +9 -0
- data/tests/wellformed/rss/channel_author_map_author_detail_email.xml +9 -0
- data/tests/wellformed/rss/channel_author_map_author_detail_email_2.xml +9 -0
- data/tests/wellformed/rss/channel_author_map_author_detail_email_3.xml +9 -0
- data/tests/wellformed/rss/channel_author_map_author_detail_name.xml +9 -0
- data/tests/wellformed/rss/channel_author_map_author_detail_name_2.xml +9 -0
- data/tests/wellformed/rss/channel_category.xml +9 -0
- data/tests/wellformed/rss/channel_category_domain.xml +9 -0
- data/tests/wellformed/rss/channel_category_multiple.xml +10 -0
- data/tests/wellformed/rss/channel_category_multiple_2.xml +10 -0
- data/tests/wellformed/rss/channel_cloud_domain.xml +9 -0
- data/tests/wellformed/rss/channel_cloud_path.xml +9 -0
- data/tests/wellformed/rss/channel_cloud_port.xml +9 -0
- data/tests/wellformed/rss/channel_cloud_protocol.xml +9 -0
- data/tests/wellformed/rss/channel_cloud_registerProcedure.xml +9 -0
- data/tests/wellformed/rss/channel_copyright.xml +9 -0
- data/tests/wellformed/rss/channel_dc_author.xml +9 -0
- data/tests/wellformed/rss/channel_dc_author_map_author_detail_email.xml +9 -0
- data/tests/wellformed/rss/channel_dc_author_map_author_detail_name.xml +9 -0
- data/tests/wellformed/rss/channel_dc_contributor.xml +9 -0
- data/tests/wellformed/rss/channel_dc_creator.xml +9 -0
- data/tests/wellformed/rss/channel_dc_creator_map_author_detail_email.xml +9 -0
- data/tests/wellformed/rss/channel_dc_creator_map_author_detail_name.xml +9 -0
- data/tests/wellformed/rss/channel_dc_publisher.xml +9 -0
- data/tests/wellformed/rss/channel_dc_publisher_email.xml +9 -0
- data/tests/wellformed/rss/channel_dc_publisher_name.xml +9 -0
- data/tests/wellformed/rss/channel_dc_rights.xml +9 -0
- data/tests/wellformed/rss/channel_dc_subject.xml +9 -0
- data/tests/wellformed/rss/channel_dc_subject_2.xml +9 -0
- data/tests/wellformed/rss/channel_dc_subject_multiple.xml +10 -0
- data/tests/wellformed/rss/channel_dc_title.xml +9 -0
- data/tests/wellformed/rss/channel_description.xml +9 -0
- data/tests/wellformed/rss/channel_description_escaped_markup.xml +9 -0
- data/tests/wellformed/rss/channel_description_map_tagline.xml +9 -0
- data/tests/wellformed/rss/channel_description_naked_markup.xml +9 -0
- data/tests/wellformed/rss/channel_description_shorttag.xml +10 -0
- data/tests/wellformed/rss/channel_docs.xml +9 -0
- data/tests/wellformed/rss/channel_generator.xml +9 -0
- data/tests/wellformed/rss/channel_image_description.xml +16 -0
- data/tests/wellformed/rss/channel_image_height.xml +16 -0
- data/tests/wellformed/rss/channel_image_link.xml +16 -0
- data/tests/wellformed/rss/channel_image_link_conflict.xml +12 -0
- data/tests/wellformed/rss/channel_image_title.xml +16 -0
- data/tests/wellformed/rss/channel_image_title_conflict.xml +12 -0
- data/tests/wellformed/rss/channel_image_url.xml +16 -0
- data/tests/wellformed/rss/channel_image_width.xml +16 -0
- data/tests/wellformed/rss/channel_link.xml +9 -0
- data/tests/wellformed/rss/channel_managingEditor.xml +9 -0
- data/tests/wellformed/rss/channel_managingEditor_map_author_detail_email.xml +9 -0
- data/tests/wellformed/rss/channel_managingEditor_map_author_detail_name.xml +9 -0
- data/tests/wellformed/rss/channel_textInput_description.xml +14 -0
- data/tests/wellformed/rss/channel_textInput_description_conflict.xml +12 -0
- data/tests/wellformed/rss/channel_textInput_link.xml +12 -0
- data/tests/wellformed/rss/channel_textInput_link_conflict.xml +12 -0
- data/tests/wellformed/rss/channel_textInput_name.xml +11 -0
- data/tests/wellformed/rss/channel_textInput_title.xml +12 -0
- data/tests/wellformed/rss/channel_textInput_title_conflict.xml +12 -0
- data/tests/wellformed/rss/channel_title.xml +9 -0
- data/tests/wellformed/rss/channel_title_apos.xml +9 -0
- data/tests/wellformed/rss/channel_title_gt.xml +9 -0
- data/tests/wellformed/rss/channel_title_lt.xml +9 -0
- data/tests/wellformed/rss/channel_ttl.xml +9 -0
- data/tests/wellformed/rss/channel_webMaster.xml +9 -0
- data/tests/wellformed/rss/channel_webMaster_email.xml +9 -0
- data/tests/wellformed/rss/channel_webMaster_name.xml +9 -0
- data/tests/wellformed/rss/item_author.xml +11 -0
- data/tests/wellformed/rss/item_author_map_author_detail_email.xml +11 -0
- data/tests/wellformed/rss/item_author_map_author_detail_name.xml +11 -0
- data/tests/wellformed/rss/item_category.xml +11 -0
- data/tests/wellformed/rss/item_category_domain.xml +11 -0
- data/tests/wellformed/rss/item_category_multiple.xml +12 -0
- data/tests/wellformed/rss/item_category_multiple_2.xml +12 -0
- data/tests/wellformed/rss/item_comments.xml +11 -0
- data/tests/wellformed/rss/item_content_encoded.xml +11 -0
- data/tests/wellformed/rss/item_content_encoded_mode.xml +11 -0
- data/tests/wellformed/rss/item_content_encoded_type.xml +11 -0
- data/tests/wellformed/rss/item_dc_author.xml +11 -0
- data/tests/wellformed/rss/item_dc_author_map_author_detail_email.xml +11 -0
- data/tests/wellformed/rss/item_dc_author_map_author_detail_name.xml +11 -0
- data/tests/wellformed/rss/item_dc_contributor.xml +11 -0
- data/tests/wellformed/rss/item_dc_creator.xml +11 -0
- data/tests/wellformed/rss/item_dc_creator_map_author_detail_email.xml +11 -0
- data/tests/wellformed/rss/item_dc_creator_map_author_detail_name.xml +11 -0
- data/tests/wellformed/rss/item_dc_publisher.xml +11 -0
- data/tests/wellformed/rss/item_dc_publisher_email.xml +11 -0
- data/tests/wellformed/rss/item_dc_publisher_name.xml +11 -0
- data/tests/wellformed/rss/item_dc_rights.xml +11 -0
- data/tests/wellformed/rss/item_dc_subject.xml +11 -0
- data/tests/wellformed/rss/item_dc_subject_2.xml +11 -0
- data/tests/wellformed/rss/item_dc_subject_multiple.xml +12 -0
- data/tests/wellformed/rss/item_dc_title.xml +11 -0
- data/tests/wellformed/rss/item_description.xml +11 -0
- data/tests/wellformed/rss/item_description_and_summary.xml +12 -0
- data/tests/wellformed/rss/item_description_br.xml +11 -0
- data/tests/wellformed/rss/item_description_br_shorttag.xml +12 -0
- data/tests/wellformed/rss/item_description_escaped_markup.xml +11 -0
- data/tests/wellformed/rss/item_description_map_summary.xml +11 -0
- data/tests/wellformed/rss/item_description_naked_markup.xml +11 -0
- data/tests/wellformed/rss/item_description_not_a_doctype.xml +9 -0
- data/tests/wellformed/rss/item_enclosure_length.xml +12 -0
- data/tests/wellformed/rss/item_enclosure_multiple.xml +13 -0
- data/tests/wellformed/rss/item_enclosure_type.xml +12 -0
- data/tests/wellformed/rss/item_enclosure_url.xml +12 -0
- data/tests/wellformed/rss/item_fullitem.xml +11 -0
- data/tests/wellformed/rss/item_fullitem_mode.xml +11 -0
- data/tests/wellformed/rss/item_fullitem_type.xml +11 -0
- data/tests/wellformed/rss/item_guid.xml +11 -0
- data/tests/wellformed/rss/item_guid_conflict_link.xml +12 -0
- data/tests/wellformed/rss/item_guid_guidislink.xml +11 -0
- data/tests/wellformed/rss/item_guid_isPermaLink_conflict_link.xml +12 -0
- data/tests/wellformed/rss/item_guid_isPermaLink_conflict_link_not_guidislink.xml +12 -0
- data/tests/wellformed/rss/item_guid_isPermaLink_guidislink.xml +11 -0
- data/tests/wellformed/rss/item_guid_isPermaLink_map_link.xml +11 -0
- data/tests/wellformed/rss/item_guid_map_link.xml +11 -0
- data/tests/wellformed/rss/item_guid_not_permalink.xml +11 -0
- data/tests/wellformed/rss/item_guid_not_permalink_conflict_link.xml +12 -0
- data/tests/wellformed/rss/item_guid_not_permalink_not_guidislink.xml +11 -0
- data/tests/wellformed/rss/item_guid_not_permalink_not_guidislink_2.xml +12 -0
- data/tests/wellformed/rss/item_link.xml +11 -0
- data/tests/wellformed/rss/item_source.xml +12 -0
- data/tests/wellformed/rss/item_source_url.xml +12 -0
- data/tests/wellformed/rss/item_summary_and_description.xml +12 -0
- data/tests/wellformed/rss/item_title.xml +11 -0
- data/tests/wellformed/rss/item_xhtml_body.xml +13 -0
- data/tests/wellformed/rss/item_xhtml_body_mode.xml +13 -0
- data/tests/wellformed/rss/item_xhtml_body_type.xml +13 -0
- data/tests/wellformed/rss/rss_namespace_1.xml +9 -0
- data/tests/wellformed/rss/rss_namespace_2.xml +9 -0
- data/tests/wellformed/rss/rss_namespace_3.xml +9 -0
- data/tests/wellformed/rss/rss_namespace_4.xml +9 -0
- data/tests/wellformed/rss/rss_version_090.xml +6 -0
- data/tests/wellformed/rss/rss_version_091_netscape.xml +7 -0
- data/tests/wellformed/rss/rss_version_091_userland.xml +6 -0
- data/tests/wellformed/rss/rss_version_092.xml +6 -0
- data/tests/wellformed/rss/rss_version_093.xml +6 -0
- data/tests/wellformed/rss/rss_version_094.xml +6 -0
- data/tests/wellformed/rss/rss_version_20.xml +6 -0
- data/tests/wellformed/rss/rss_version_201.xml +6 -0
- data/tests/wellformed/rss/rss_version_21.xml +6 -0
- data/tests/wellformed/rss/rss_version_missing.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_applet.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_blink.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_crazy.xml +75 -0
- data/tests/wellformed/sanitize/entry_content_embed.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_frame.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_iframe.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_link.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_meta.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_object.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onabort.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onblur.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onchange.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onclick.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_ondblclick.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onerror.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onfocus.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onkeydown.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onkeypress.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onkeyup.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onload.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onmousedown.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onmouseout.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onmouseover.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onmouseup.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onreset.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onresize.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onsubmit.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_onunload.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_script.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_script_base64.xml +12 -0
- data/tests/wellformed/sanitize/entry_content_script_cdata.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_script_inline.xml +9 -0
- data/tests/wellformed/sanitize/entry_content_style.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_applet.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_blink.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_crazy.xml +75 -0
- data/tests/wellformed/sanitize/entry_summary_embed.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_frame.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_iframe.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_link.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_meta.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_object.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onabort.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onblur.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onchange.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onclick.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_ondblclick.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onerror.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onfocus.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onkeydown.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onkeypress.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onkeyup.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onload.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onmousedown.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onmouseout.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onmouseover.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onmouseup.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onreset.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onresize.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onsubmit.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_onunload.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_script.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_script_base64.xml +12 -0
- data/tests/wellformed/sanitize/entry_summary_script_cdata.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_script_inline.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_script_map_description.xml +9 -0
- data/tests/wellformed/sanitize/entry_summary_style.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_applet.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_blink.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_crazy.xml +75 -0
- data/tests/wellformed/sanitize/entry_title_embed.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_frame.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_iframe.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_link.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_meta.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_object.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onabort.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onblur.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onchange.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onclick.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_ondblclick.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onerror.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onfocus.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onkeydown.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onkeypress.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onkeyup.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onload.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onmousedown.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onmouseout.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onmouseover.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onmouseup.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onreset.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onresize.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onsubmit.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_onunload.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_script.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_script_cdata.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_script_inline.xml +9 -0
- data/tests/wellformed/sanitize/entry_title_style.xml +9 -0
- data/tests/wellformed/sanitize/feed_copyright_applet.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_blink.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_crazy.xml +73 -0
- data/tests/wellformed/sanitize/feed_copyright_embed.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_frame.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_iframe.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_link.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_meta.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_object.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onabort.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onblur.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onchange.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onclick.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_ondblclick.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onerror.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onfocus.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onkeydown.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onkeypress.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onkeyup.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onload.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onmousedown.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onmouseout.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onmouseover.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onmouseup.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onreset.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onresize.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onsubmit.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_onunload.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_script.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_script_cdata.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_script_inline.xml +7 -0
- data/tests/wellformed/sanitize/feed_copyright_style.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_applet.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_blink.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_crazy.xml +73 -0
- data/tests/wellformed/sanitize/feed_info_embed.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_frame.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_iframe.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_link.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_meta.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_object.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onabort.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onblur.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onchange.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onclick.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_ondblclick.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onerror.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onfocus.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onkeydown.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onkeypress.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onkeyup.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onload.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onmousedown.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onmouseout.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onmouseover.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onmouseup.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onreset.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onresize.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onsubmit.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_onunload.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_script.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_script_cdata.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_script_inline.xml +7 -0
- data/tests/wellformed/sanitize/feed_info_style.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_applet.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_blink.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_crazy.xml +73 -0
- data/tests/wellformed/sanitize/feed_subtitle_embed.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_frame.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_iframe.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_link.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_meta.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_object.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onabort.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onblur.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onchange.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onclick.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_ondblclick.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onerror.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onfocus.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onkeydown.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onkeypress.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onkeyup.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onload.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onmousedown.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onmouseout.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onmouseover.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onmouseup.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onreset.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onresize.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onsubmit.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_onunload.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_script.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_script_cdata.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_script_inline.xml +7 -0
- data/tests/wellformed/sanitize/feed_subtitle_style.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_applet.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_blink.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_crazy.xml +73 -0
- data/tests/wellformed/sanitize/feed_tagline_embed.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_frame.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_iframe.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_link.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_meta.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_object.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onabort.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onblur.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onchange.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onclick.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_ondblclick.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onerror.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onfocus.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onkeydown.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onkeypress.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onkeyup.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onload.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onmousedown.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onmouseout.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onmouseover.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onmouseup.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onreset.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onresize.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onsubmit.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_onunload.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_script.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_script_cdata.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_script_inline.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_script_map_description.xml +7 -0
- data/tests/wellformed/sanitize/feed_tagline_style.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_applet.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_blink.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_crazy.xml +73 -0
- data/tests/wellformed/sanitize/feed_title_embed.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_frame.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_iframe.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_link.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_meta.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_object.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onabort.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onblur.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onchange.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onclick.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_ondblclick.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onerror.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onfocus.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onkeydown.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onkeypress.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onkeyup.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onload.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onmousedown.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onmouseout.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onmouseover.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onmouseup.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onreset.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onresize.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onsubmit.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_onunload.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_script.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_script_cdata.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_script_inline.xml +7 -0
- data/tests/wellformed/sanitize/feed_title_style.xml +7 -0
- data/tests/wellformed/sanitize/item_body_applet.xml +11 -0
- data/tests/wellformed/sanitize/item_body_blink.xml +11 -0
- data/tests/wellformed/sanitize/item_body_embed.xml +11 -0
- data/tests/wellformed/sanitize/item_body_frame.xml +11 -0
- data/tests/wellformed/sanitize/item_body_iframe.xml +11 -0
- data/tests/wellformed/sanitize/item_body_link.xml +11 -0
- data/tests/wellformed/sanitize/item_body_meta.xml +11 -0
- data/tests/wellformed/sanitize/item_body_object.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onabort.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onblur.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onchange.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onclick.xml +11 -0
- data/tests/wellformed/sanitize/item_body_ondblclick.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onerror.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onfocus.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onkeydown.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onkeypress.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onkeyup.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onload.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onmousedown.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onmouseout.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onmouseover.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onmouseup.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onreset.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onresize.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onsubmit.xml +11 -0
- data/tests/wellformed/sanitize/item_body_onunload.xml +11 -0
- data/tests/wellformed/sanitize/item_body_script.xml +11 -0
- data/tests/wellformed/sanitize/item_body_script_map_content.xml +11 -0
- data/tests/wellformed/sanitize/item_body_style.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_applet.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_blink.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_crazy.xml +77 -0
- data/tests/wellformed/sanitize/item_content_encoded_embed.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_frame.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_iframe.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_link.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_map_content.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_meta.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_object.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onabort.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onblur.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onchange.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onclick.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_ondblclick.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onerror.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onfocus.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onkeydown.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onkeypress.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onkeyup.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onload.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onmousedown.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onmouseout.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onmouseover.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onmouseup.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onreset.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onresize.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onsubmit.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_onunload.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_script.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_script_cdata.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_script_map_content.xml +11 -0
- data/tests/wellformed/sanitize/item_content_encoded_style.xml +11 -0
- data/tests/wellformed/sanitize/item_description_applet.xml +11 -0
- data/tests/wellformed/sanitize/item_description_blink.xml +11 -0
- data/tests/wellformed/sanitize/item_description_crazy.xml +81 -0
- data/tests/wellformed/sanitize/item_description_embed.xml +11 -0
- data/tests/wellformed/sanitize/item_description_frame.xml +11 -0
- data/tests/wellformed/sanitize/item_description_iframe.xml +11 -0
- data/tests/wellformed/sanitize/item_description_link.xml +11 -0
- data/tests/wellformed/sanitize/item_description_meta.xml +11 -0
- data/tests/wellformed/sanitize/item_description_object.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onabort.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onblur.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onchange.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onclick.xml +11 -0
- data/tests/wellformed/sanitize/item_description_ondblclick.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onerror.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onfocus.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onkeydown.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onkeypress.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onkeyup.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onload.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onmousedown.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onmouseout.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onmouseover.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onmouseup.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onreset.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onresize.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onsubmit.xml +11 -0
- data/tests/wellformed/sanitize/item_description_onunload.xml +11 -0
- data/tests/wellformed/sanitize/item_description_script.xml +11 -0
- data/tests/wellformed/sanitize/item_description_script_cdata.xml +11 -0
- data/tests/wellformed/sanitize/item_description_script_map_summary.xml +11 -0
- data/tests/wellformed/sanitize/item_description_style.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_applet.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_blink.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_crazy.xml +77 -0
- data/tests/wellformed/sanitize/item_fullitem_embed.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_frame.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_iframe.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_link.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_meta.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_object.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onabort.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onblur.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onchange.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onclick.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_ondblclick.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onerror.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onfocus.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onkeydown.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onkeypress.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onkeyup.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onload.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onmousedown.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onmouseout.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onmouseover.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onmouseup.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onreset.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onresize.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onsubmit.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_onunload.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_script.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_script_cdata.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_script_map_summary.xml +11 -0
- data/tests/wellformed/sanitize/item_fullitem_style.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_applet.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_blink.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_embed.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_frame.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_iframe.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_link.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_meta.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_object.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onabort.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onblur.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onchange.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onclick.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_ondblclick.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onerror.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onfocus.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onkeydown.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onkeypress.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onkeyup.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onload.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onmousedown.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onmouseout.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onmouseover.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onmouseup.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onreset.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onresize.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onsubmit.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_onunload.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_script.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_script_map_content.xml +11 -0
- data/tests/wellformed/sanitize/item_xhtml_body_style.xml +11 -0
- metadata +3472 -0
data/LICENSE
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
I include this license in good faith effort, and it should be considered the license for the code herein.
|
2
|
+
- Jeff Hodges < jeff at somethingsimilar.com >
|
3
|
+
--
|
4
|
+
Universal Feed Parser (feedparser.py), its testing harness (feedparsertest.py),
|
5
|
+
and its unit tests (everything in the tests/ directory) are released under the
|
6
|
+
following license:
|
7
|
+
|
8
|
+
----- begin license block -----
|
9
|
+
|
10
|
+
Copyright (c) 2002-2005, Mark Pilgrim
|
11
|
+
All rights reserved.
|
12
|
+
|
13
|
+
Redistribution and use in source and binary forms, with or without modification,
|
14
|
+
are permitted provided that the following conditions are met:
|
15
|
+
|
16
|
+
* Redistributions of source code must retain the above copyright notice,
|
17
|
+
this list of conditions and the following disclaimer.
|
18
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
19
|
+
this list of conditions and the following disclaimer in the documentation
|
20
|
+
and/or other materials provided with the distribution.
|
21
|
+
|
22
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
23
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
24
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
25
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
26
|
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
27
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
28
|
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
29
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
30
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
31
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
32
|
+
POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
|
34
|
+
----- end license block -----
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
Universal Feed Parser documentation (everything in the docs/ directory) is
|
41
|
+
released under the following license:
|
42
|
+
|
43
|
+
----- begin license block -----
|
44
|
+
|
45
|
+
Copyright 2004-2005 Mark Pilgrim. All rights reserved.
|
46
|
+
|
47
|
+
Redistribution and use in source (XML DocBook) and "compiled" forms (SGML,
|
48
|
+
HTML, PDF, PostScript, RTF and so forth) with or without modification, are
|
49
|
+
permitted provided that the following conditions are met:
|
50
|
+
|
51
|
+
* Redistributions of source code (XML DocBook) must retain the above copyright
|
52
|
+
notice, this list of conditions and the following disclaimer.
|
53
|
+
* Redistributions in compiled form (transformed to other DTDs, converted to
|
54
|
+
PDF, PostScript, RTF and other formats) must reproduce the above copyright
|
55
|
+
notice, this list of conditions and the following disclaimer in the
|
56
|
+
documentation and/or other materials provided with the distribution.
|
57
|
+
|
58
|
+
THIS DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
59
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
60
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
61
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
62
|
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
63
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
64
|
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
65
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
66
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
67
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS DOCUMENTATION, EVEN IF ADVISED OF THE
|
68
|
+
POSSIBILITY OF SUCH DAMAGE.
|
data/README
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
Universal Feed Parser
|
2
|
+
Parse RSS and Atom feeds in Python. 3000 unit tests. Open source.
|
3
|
+
|
4
|
+
Copyright (c) 2002-5 by Mark Pilgrim
|
5
|
+
open source, see LICENSE file for details
|
6
|
+
|
7
|
+
-----
|
8
|
+
|
9
|
+
To use:
|
10
|
+
If installed as a gem
|
11
|
+
require 'rubygems'
|
12
|
+
gem 'rfeedparser
|
13
|
+
require 'feedparser'
|
14
|
+
|
15
|
+
fp = FeedParser.parse("some-feed-filepath-or-url")
|
16
|
+
|
17
|
+
If not installed as a gem, copy the contents of lib into the ruby path and just use
|
18
|
+
require 'feedparser'
|
19
|
+
|
20
|
+
fp = FeedParser.parse("some-feed-filepath-or-url")
|
21
|
+
|
22
|
+
----
|
23
|
+
|
24
|
+
For developers:
|
25
|
+
I currently have the "rough" code in a bzr branch over at
|
26
|
+
<http://somethingsimilar.com/code/bzr/rfeedparser/>. You'll want
|
27
|
+
to check out the rfeedparser-main branch for the current code, or
|
28
|
+
rfeedparser-release for the code in the latest release.
|
data/RUBY-TESTING
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
=== Testing rFeedParser ===
|
2
|
+
Simply run `ruby feedparsertest.rb` to run all of the FeedParser tests.
|
3
|
+
Optionally, you can start up feedparserserver.rb and run feedparser.rb
|
4
|
+
against "http://localhost:8097/tests/path/to/testcase.xml" if you want
|
5
|
+
to try a test individually. I'll probably merge feedparserserver.rb into
|
6
|
+
feedparsertext.rb soon.
|
7
|
+
|
8
|
+
=== Last Count 20070321 ===
|
9
|
+
By my last count, feedparsertext.rb says that there are 45 assertions
|
10
|
+
that fail, and 4 that error out. I've included here a few tests that
|
11
|
+
"Failed, Sort Of". By that I mean, the behaviors the tests are meant to
|
12
|
+
check are correct, but the test fails because of some other superficial
|
13
|
+
or unrelated behavior.
|
14
|
+
|
15
|
+
=== Tests Failed, Sort Of ===
|
16
|
+
|
17
|
+
Problem:
|
18
|
+
Hpricot adds end tags when it sees an unclosed tag. This means that
|
19
|
+
certain tests that rely on feedparser.py's _HTMLSanitizer not closing
|
20
|
+
tags will fail. Many of the tests affected (actually, all the ones
|
21
|
+
affected, AFAICT) would otherwise passed.
|
22
|
+
|
23
|
+
Tests Affected:
|
24
|
+
* tests/wellformed/rss/item_description_not_a_doctype.xml (extraneous trailing </a>)
|
25
|
+
* tests/illformed/rss/item_description_not_a_doctype.xml (ditto)
|
26
|
+
==
|
27
|
+
Problem:
|
28
|
+
The Hpricot#scrub method I've written does not remove the dangerous
|
29
|
+
markup in the same way feedparser.py does, but the output is still safe.
|
30
|
+
|
31
|
+
Tests Affected:
|
32
|
+
* tests/wellformed/sanitize/entry_content_crazy.xml
|
33
|
+
* tests/wellformed/sanitize/entry_summary_crazy.xml
|
34
|
+
* tests/wellformed/sanitize/entry_title_crazy.xml
|
35
|
+
* tests/wellformed/sanitize/feed_copyright_crazy.xml
|
36
|
+
* tests/wellformed/sanitize/feed_info_crazy.xml
|
37
|
+
* tests/wellformed/sanitize/feed_subtitle_crazy.xml
|
38
|
+
* tests/wellformed/sanitize/feed_tagline_crazy.xml
|
39
|
+
* tests/wellformed/sanitize/feed_title_crazy.xml
|
40
|
+
* tests/wellformed/sanitize/item_content_encoded_crazy.xml
|
41
|
+
* tests/wellformed/sanitize/item_description_crazy.xml
|
42
|
+
* tests/wellformed/sanitize/item_fullitem_crazy.xml
|
43
|
+
* tests/illformed/sanitize/entry_content_crazy.xml
|
44
|
+
* tests/illformed/sanitize/entry_summary_crazy.xml
|
45
|
+
* tests/illformed/sanitize/entry_title_crazy.xml
|
46
|
+
* tests/illformed/sanitize/feed_copyright_crazy.xml
|
47
|
+
* tests/illformed/sanitize/feed_info_crazy.xml
|
48
|
+
* tests/illformed/sanitize/feed_subtitle_crazy.xml
|
49
|
+
* tests/illformed/sanitize/feed_tagline_crazy.xml
|
50
|
+
* tests/illformed/sanitize/feed_title_crazy.xml
|
51
|
+
* tests/illformed/sanitize/item_content_encoded_crazy.xml
|
52
|
+
* tests/illformed/sanitize/item_description_crazy.xml
|
53
|
+
* tests/illformed/sanitize/item_fullitem_crazy.xml
|
54
|
+
==
|
55
|
+
|
56
|
+
Problem:
|
57
|
+
My current system lacks a few encodings that rfeedparser and Iconv need.
|
58
|
+
This results in failures that will probably not occur on other machines.
|
59
|
+
|
60
|
+
Tests Affected:
|
data/lib/feedparser.rb
ADDED
@@ -0,0 +1,3671 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
"""Universal feed parser in Ruby
|
3
|
+
|
4
|
+
Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds
|
5
|
+
|
6
|
+
Visit http://feedparser.org/ for the latest version in Python
|
7
|
+
Visit http://feedparser.org/docs/ for the latest documentation
|
8
|
+
Email Jeff Hodges at jeff@obquo.com for questions
|
9
|
+
|
10
|
+
Required: Ruby 1.8
|
11
|
+
"""
|
12
|
+
$KCODE = 'UTF8'
|
13
|
+
require 'stringio'
|
14
|
+
require 'uri'
|
15
|
+
require 'cgi' # escaping html
|
16
|
+
require 'time'
|
17
|
+
require 'xml/saxdriver' # calling expat
|
18
|
+
require 'pp'
|
19
|
+
require 'rubygems'
|
20
|
+
require 'base64'
|
21
|
+
require 'iconv'
|
22
|
+
begin
|
23
|
+
gem 'hpricot', ">=0.5"
|
24
|
+
gem 'character-encodings', ">=0.2.0"
|
25
|
+
gem 'htmltools'
|
26
|
+
gem 'htmlentities'
|
27
|
+
gem 'activesupport'
|
28
|
+
gem 'rchardet'
|
29
|
+
rescue Gem::LoadError,LoadError
|
30
|
+
end
|
31
|
+
|
32
|
+
require 'chardet'
|
33
|
+
$chardet = true
|
34
|
+
|
35
|
+
require 'hpricot'
|
36
|
+
require 'encoding/character/utf-8'
|
37
|
+
require 'html/sgml-parser'
|
38
|
+
require 'htmlentities'
|
39
|
+
require 'active_support'
|
40
|
+
require 'open-uri'
|
41
|
+
include OpenURI
|
42
|
+
|
43
|
+
$debug = false
|
44
|
+
$compatible = true
|
45
|
+
|
46
|
+
Encoding_Aliases = { # Adapted from python2.4's encodings/aliases.py
|
47
|
+
# ascii codec
|
48
|
+
'646' => 'ascii',
|
49
|
+
'ansi_x3.4_1968' => 'ascii',
|
50
|
+
'ansi_x3_4_1968' => 'ascii', # some email headers use this non-standard name
|
51
|
+
'ansi_x3.4_1986' => 'ascii',
|
52
|
+
'cp367' => 'ascii',
|
53
|
+
'csascii' => 'ascii',
|
54
|
+
'ibm367' => 'ascii',
|
55
|
+
'iso646_us' => 'ascii',
|
56
|
+
'iso_646.irv_1991' => 'ascii',
|
57
|
+
'iso_ir_6' => 'ascii',
|
58
|
+
'us' => 'ascii',
|
59
|
+
'us_ascii' => 'ascii',
|
60
|
+
|
61
|
+
# big5 codec
|
62
|
+
'big5_tw' => 'big5',
|
63
|
+
'csbig5' => 'big5',
|
64
|
+
|
65
|
+
# big5hkscs codec
|
66
|
+
'big5_hkscs' => 'big5hkscs',
|
67
|
+
'hkscs' => 'big5hkscs',
|
68
|
+
|
69
|
+
# cp037 codec
|
70
|
+
'037' => 'cp037',
|
71
|
+
'csibm037' => 'cp037',
|
72
|
+
'ebcdic_cp_ca' => 'cp037',
|
73
|
+
'ebcdic_cp_nl' => 'cp037',
|
74
|
+
'ebcdic_cp_us' => 'cp037',
|
75
|
+
'ebcdic_cp_wt' => 'cp037',
|
76
|
+
'ibm037' => 'cp037',
|
77
|
+
'ibm039' => 'cp037',
|
78
|
+
|
79
|
+
# cp1026 codec
|
80
|
+
'1026' => 'cp1026',
|
81
|
+
'csibm1026' => 'cp1026',
|
82
|
+
'ibm1026' => 'cp1026',
|
83
|
+
|
84
|
+
# cp1140 codec
|
85
|
+
'1140' => 'cp1140',
|
86
|
+
'ibm1140' => 'cp1140',
|
87
|
+
|
88
|
+
# cp1250 codec
|
89
|
+
'1250' => 'cp1250',
|
90
|
+
'windows_1250' => 'cp1250',
|
91
|
+
|
92
|
+
# cp1251 codec
|
93
|
+
'1251' => 'cp1251',
|
94
|
+
'windows_1251' => 'cp1251',
|
95
|
+
|
96
|
+
# cp1252 codec
|
97
|
+
'1252' => 'cp1252',
|
98
|
+
'windows_1252' => 'cp1252',
|
99
|
+
|
100
|
+
# cp1253 codec
|
101
|
+
'1253' => 'cp1253',
|
102
|
+
'windows_1253' => 'cp1253',
|
103
|
+
|
104
|
+
# cp1254 codec
|
105
|
+
'1254' => 'cp1254',
|
106
|
+
'windows_1254' => 'cp1254',
|
107
|
+
|
108
|
+
# cp1255 codec
|
109
|
+
'1255' => 'cp1255',
|
110
|
+
'windows_1255' => 'cp1255',
|
111
|
+
|
112
|
+
# cp1256 codec
|
113
|
+
'1256' => 'cp1256',
|
114
|
+
'windows_1256' => 'cp1256',
|
115
|
+
|
116
|
+
# cp1257 codec
|
117
|
+
'1257' => 'cp1257',
|
118
|
+
'windows_1257' => 'cp1257',
|
119
|
+
|
120
|
+
# cp1258 codec
|
121
|
+
'1258' => 'cp1258',
|
122
|
+
'windows_1258' => 'cp1258',
|
123
|
+
|
124
|
+
# cp424 codec
|
125
|
+
'424' => 'cp424',
|
126
|
+
'csibm424' => 'cp424',
|
127
|
+
'ebcdic_cp_he' => 'cp424',
|
128
|
+
'ibm424' => 'cp424',
|
129
|
+
|
130
|
+
# cp437 codec
|
131
|
+
'437' => 'cp437',
|
132
|
+
'cspc8codepage437' => 'cp437',
|
133
|
+
'ibm437' => 'cp437',
|
134
|
+
|
135
|
+
# cp500 codec
|
136
|
+
'500' => 'cp500',
|
137
|
+
'csibm500' => 'cp500',
|
138
|
+
'ebcdic_cp_be' => 'cp500',
|
139
|
+
'ebcdic_cp_ch' => 'cp500',
|
140
|
+
'ibm500' => 'cp500',
|
141
|
+
|
142
|
+
# cp775 codec
|
143
|
+
'775' => 'cp775',
|
144
|
+
'cspc775baltic' => 'cp775',
|
145
|
+
'ibm775' => 'cp775',
|
146
|
+
|
147
|
+
# cp850 codec
|
148
|
+
'850' => 'cp850',
|
149
|
+
'cspc850multilingual' => 'cp850',
|
150
|
+
'ibm850' => 'cp850',
|
151
|
+
|
152
|
+
# cp852 codec
|
153
|
+
'852' => 'cp852',
|
154
|
+
'cspcp852' => 'cp852',
|
155
|
+
'ibm852' => 'cp852',
|
156
|
+
|
157
|
+
# cp855 codec
|
158
|
+
'855' => 'cp855',
|
159
|
+
'csibm855' => 'cp855',
|
160
|
+
'ibm855' => 'cp855',
|
161
|
+
|
162
|
+
# cp857 codec
|
163
|
+
'857' => 'cp857',
|
164
|
+
'csibm857' => 'cp857',
|
165
|
+
'ibm857' => 'cp857',
|
166
|
+
|
167
|
+
# cp860 codec
|
168
|
+
'860' => 'cp860',
|
169
|
+
'csibm860' => 'cp860',
|
170
|
+
'ibm860' => 'cp860',
|
171
|
+
|
172
|
+
# cp861 codec
|
173
|
+
'861' => 'cp861',
|
174
|
+
'cp_is' => 'cp861',
|
175
|
+
'csibm861' => 'cp861',
|
176
|
+
'ibm861' => 'cp861',
|
177
|
+
|
178
|
+
# cp862 codec
|
179
|
+
'862' => 'cp862',
|
180
|
+
'cspc862latinhebrew' => 'cp862',
|
181
|
+
'ibm862' => 'cp862',
|
182
|
+
|
183
|
+
# cp863 codec
|
184
|
+
'863' => 'cp863',
|
185
|
+
'csibm863' => 'cp863',
|
186
|
+
'ibm863' => 'cp863',
|
187
|
+
|
188
|
+
# cp864 codec
|
189
|
+
'864' => 'cp864',
|
190
|
+
'csibm864' => 'cp864',
|
191
|
+
'ibm864' => 'cp864',
|
192
|
+
|
193
|
+
# cp865 codec
|
194
|
+
'865' => 'cp865',
|
195
|
+
'csibm865' => 'cp865',
|
196
|
+
'ibm865' => 'cp865',
|
197
|
+
|
198
|
+
# cp866 codec
|
199
|
+
'866' => 'cp866',
|
200
|
+
'csibm866' => 'cp866',
|
201
|
+
'ibm866' => 'cp866',
|
202
|
+
|
203
|
+
# cp869 codec
|
204
|
+
'869' => 'cp869',
|
205
|
+
'cp_gr' => 'cp869',
|
206
|
+
'csibm869' => 'cp869',
|
207
|
+
'ibm869' => 'cp869',
|
208
|
+
|
209
|
+
# cp932 codec
|
210
|
+
'932' => 'cp932',
|
211
|
+
'ms932' => 'cp932',
|
212
|
+
'mskanji' => 'cp932',
|
213
|
+
'ms_kanji' => 'cp932',
|
214
|
+
|
215
|
+
# cp949 codec
|
216
|
+
'949' => 'cp949',
|
217
|
+
'ms949' => 'cp949',
|
218
|
+
'uhc' => 'cp949',
|
219
|
+
|
220
|
+
# cp950 codec
|
221
|
+
'950' => 'cp950',
|
222
|
+
'ms950' => 'cp950',
|
223
|
+
|
224
|
+
# euc_jp codec
|
225
|
+
'euc_jp' => 'euc-jp',
|
226
|
+
'eucjp' => 'euc-jp',
|
227
|
+
'ujis' => 'euc-jp',
|
228
|
+
'u_jis' => 'euc-jp',
|
229
|
+
|
230
|
+
# euc_kr codec
|
231
|
+
'euc_kr' => 'euc-kr',
|
232
|
+
'euckr' => 'euc-kr',
|
233
|
+
'korean' => 'euc-kr',
|
234
|
+
'ksc5601' => 'euc-kr',
|
235
|
+
'ks_c_5601' => 'euc-kr',
|
236
|
+
'ks_c_5601_1987' => 'euc-kr',
|
237
|
+
'ksx1001' => 'euc-kr',
|
238
|
+
'ks_x_1001' => 'euc-kr',
|
239
|
+
|
240
|
+
# gb18030 codec
|
241
|
+
'gb18030_2000' => 'gb18030',
|
242
|
+
|
243
|
+
# gb2312 codec
|
244
|
+
'chinese' => 'gb2312',
|
245
|
+
'csiso58gb231280' => 'gb2312',
|
246
|
+
'euc_cn' => 'gb2312',
|
247
|
+
'euccn' => 'gb2312',
|
248
|
+
'eucgb2312_cn' => 'gb2312',
|
249
|
+
'gb2312_1980' => 'gb2312',
|
250
|
+
'gb2312_80' => 'gb2312',
|
251
|
+
'iso_ir_58' => 'gb2312',
|
252
|
+
|
253
|
+
# gbk codec
|
254
|
+
'936' => 'gbk',
|
255
|
+
'cp936' => 'gbk',
|
256
|
+
'ms936' => 'gbk',
|
257
|
+
|
258
|
+
# hp-roman8 codec
|
259
|
+
'hp_roman8' => 'hp-roman8',
|
260
|
+
'roman8' => 'hp-roman8',
|
261
|
+
'r8' => 'hp-roman8',
|
262
|
+
'csHPRoman8' => 'hp-roman8',
|
263
|
+
|
264
|
+
# iso2022_jp codec
|
265
|
+
'iso2022_jp' => 'iso-2022-jp',
|
266
|
+
'csiso2022jp' => 'iso-2022-jp',
|
267
|
+
'iso2022jp' => 'iso-2022-jp',
|
268
|
+
'iso_2022_jp' => 'iso-2022-jp',
|
269
|
+
|
270
|
+
# iso2022_jp_1 codec
|
271
|
+
'iso2002_jp_1' => 'iso-2022-jp-1',
|
272
|
+
'iso2022jp_1' => 'iso-2022-jp-1',
|
273
|
+
'iso_2022_jp_1' => 'iso-2022-jp-1',
|
274
|
+
|
275
|
+
# iso2022_jp_2 codec
|
276
|
+
'iso2022_jp_2' => 'iso-2002-jp-2',
|
277
|
+
'iso2022jp_2' => 'iso-2022-jp-2',
|
278
|
+
'iso_2022_jp_2' => 'iso-2022-jp-2',
|
279
|
+
|
280
|
+
# iso2022_jp_3 codec
|
281
|
+
'iso2002_jp_3' => 'iso-2022-jp-3',
|
282
|
+
'iso2022jp_3' => 'iso-2022-jp-3',
|
283
|
+
'iso_2022_jp_3' => 'iso-2022-jp-3',
|
284
|
+
|
285
|
+
# iso2022_kr codec
|
286
|
+
'iso2022_kr' => 'iso-2022-kr',
|
287
|
+
'csiso2022kr' => 'iso-2022-kr',
|
288
|
+
'iso2022kr' => 'iso-2022-kr',
|
289
|
+
'iso_2022_kr' => 'iso-2022-kr',
|
290
|
+
|
291
|
+
# iso8859_10 codec
|
292
|
+
'iso8859_10' => 'iso-8859-10',
|
293
|
+
'csisolatin6' => 'iso-8859-10',
|
294
|
+
'iso_8859_10' => 'iso-8859-10',
|
295
|
+
'iso_8859_10_1992' => 'iso-8859-10',
|
296
|
+
'iso_ir_157' => 'iso-8859-10',
|
297
|
+
'l6' => 'iso-8859-10',
|
298
|
+
'latin6' => 'iso-8859-10',
|
299
|
+
|
300
|
+
# iso8859_13 codec
|
301
|
+
'iso8859_13' => 'iso-8859-13',
|
302
|
+
'iso_8859_13' => 'iso-8859-13',
|
303
|
+
|
304
|
+
# iso8859_14 codec
|
305
|
+
'iso8859_14' => 'iso-8859-14',
|
306
|
+
'iso_8859_14' => 'iso-8859-14',
|
307
|
+
'iso_8859_14_1998' => 'iso-8859-14',
|
308
|
+
'iso_celtic' => 'iso-8859-14',
|
309
|
+
'iso_ir_199' => 'iso-8859-14',
|
310
|
+
'l8' => 'iso-8859-14',
|
311
|
+
'latin8' => 'iso-8859-14',
|
312
|
+
|
313
|
+
# iso8859_15 codec
|
314
|
+
'iso8859_15' => 'iso-8859-15',
|
315
|
+
'iso_8859_15' => 'iso-8859-15',
|
316
|
+
|
317
|
+
# iso8859_1 codec
|
318
|
+
'latin_1' => 'iso-8859-1',
|
319
|
+
'cp819' => 'iso-8859-1',
|
320
|
+
'csisolatin1' => 'iso-8859-1',
|
321
|
+
'ibm819' => 'iso-8859-1',
|
322
|
+
'iso8859' => 'iso-8859-1',
|
323
|
+
'iso_8859_1' => 'iso-8859-1',
|
324
|
+
'iso_8859_1_1987' => 'iso-8859-1',
|
325
|
+
'iso_ir_100' => 'iso-8859-1',
|
326
|
+
'l1' => 'iso-8859-1',
|
327
|
+
'latin' => 'iso-8859-1',
|
328
|
+
'latin1' => 'iso-8859-1',
|
329
|
+
|
330
|
+
# iso8859_2 codec
|
331
|
+
'iso8859_2' => 'iso-8859-2',
|
332
|
+
'csisolatin2' => 'iso-8859-2',
|
333
|
+
'iso_8859_2' => 'iso-8859-2',
|
334
|
+
'iso_8859_2_1987' => 'iso-8859-2',
|
335
|
+
'iso_ir_101' => 'iso-8859-2',
|
336
|
+
'l2' => 'iso-8859-2',
|
337
|
+
'latin2' => 'iso-8859-2',
|
338
|
+
|
339
|
+
# iso8859_3 codec
|
340
|
+
'iso8859_3' => 'iso-8859-3',
|
341
|
+
'csisolatin3' => 'iso-8859-3',
|
342
|
+
'iso_8859_3' => 'iso-8859-3',
|
343
|
+
'iso_8859_3_1988' => 'iso-8859-3',
|
344
|
+
'iso_ir_109' => 'iso-8859-3',
|
345
|
+
'l3' => 'iso-8859-3',
|
346
|
+
'latin3' => 'iso-8859-3',
|
347
|
+
|
348
|
+
# iso8859_4 codec
|
349
|
+
'iso8849_4' => 'iso-8859-4',
|
350
|
+
'csisolatin4' => 'iso-8859-4',
|
351
|
+
'iso_8859_4' => 'iso-8859-4',
|
352
|
+
'iso_8859_4_1988' => 'iso-8859-4',
|
353
|
+
'iso_ir_110' => 'iso-8859-4',
|
354
|
+
'l4' => 'iso-8859-4',
|
355
|
+
'latin4' => 'iso-8859-4',
|
356
|
+
|
357
|
+
# iso8859_5 codec
|
358
|
+
'iso8859_5' => 'iso-8859-5',
|
359
|
+
'csisolatincyrillic' => 'iso-8859-5',
|
360
|
+
'cyrillic' => 'iso-8859-5',
|
361
|
+
'iso_8859_5' => 'iso-8859-5',
|
362
|
+
'iso_8859_5_1988' => 'iso-8859-5',
|
363
|
+
'iso_ir_144' => 'iso-8859-5',
|
364
|
+
|
365
|
+
# iso8859_6 codec
|
366
|
+
'iso8859_6' => 'iso-8859-6',
|
367
|
+
'arabic' => 'iso-8859-6',
|
368
|
+
'asmo_708' => 'iso-8859-6',
|
369
|
+
'csisolatinarabic' => 'iso-8859-6',
|
370
|
+
'ecma_114' => 'iso-8859-6',
|
371
|
+
'iso_8859_6' => 'iso-8859-6',
|
372
|
+
'iso_8859_6_1987' => 'iso-8859-6',
|
373
|
+
'iso_ir_127' => 'iso-8859-6',
|
374
|
+
|
375
|
+
# iso8859_7 codec
|
376
|
+
'iso8859_7' => 'iso-8859-7',
|
377
|
+
'csisolatingreek' => 'iso-8859-7',
|
378
|
+
'ecma_118' => 'iso-8859-7',
|
379
|
+
'elot_928' => 'iso-8859-7',
|
380
|
+
'greek' => 'iso-8859-7',
|
381
|
+
'greek8' => 'iso-8859-7',
|
382
|
+
'iso_8859_7' => 'iso-8859-7',
|
383
|
+
'iso_8859_7_1987' => 'iso-8859-7',
|
384
|
+
'iso_ir_126' => 'iso-8859-7',
|
385
|
+
|
386
|
+
# iso8859_8 codec
|
387
|
+
'iso8859_9' => 'iso8859_8',
|
388
|
+
'csisolatinhebrew' => 'iso-8859-8',
|
389
|
+
'hebrew' => 'iso-8859-8',
|
390
|
+
'iso_8859_8' => 'iso-8859-8',
|
391
|
+
'iso_8859_8_1988' => 'iso-8859-8',
|
392
|
+
'iso_ir_138' => 'iso-8859-8',
|
393
|
+
|
394
|
+
# iso8859_9 codec
|
395
|
+
'iso8859_9' => 'iso-8859-9',
|
396
|
+
'csisolatin5' => 'iso-8859-9',
|
397
|
+
'iso_8859_9' => 'iso-8859-9',
|
398
|
+
'iso_8859_9_1989' => 'iso-8859-9',
|
399
|
+
'iso_ir_148' => 'iso-8859-9',
|
400
|
+
'l5' => 'iso-8859-9',
|
401
|
+
'latin5' => 'iso-8859-9',
|
402
|
+
|
403
|
+
# iso8859_11 codec
|
404
|
+
'iso8859_11' => 'iso-8859-11',
|
405
|
+
'thai' => 'iso-8859-11',
|
406
|
+
'iso_8859_11' => 'iso-8859-11',
|
407
|
+
'iso_8859_11_2001' => 'iso-8859-11',
|
408
|
+
|
409
|
+
# iso8859_16 codec
|
410
|
+
'iso8859_16' => 'iso-8859-16',
|
411
|
+
'iso_8859_16' => 'iso-8859-16',
|
412
|
+
'iso_8859_16_2001' => 'iso-8859-16',
|
413
|
+
'iso_ir_226' => 'iso-8859-16',
|
414
|
+
'l10' => 'iso-8859-16',
|
415
|
+
'latin10' => 'iso-8859-16',
|
416
|
+
|
417
|
+
# cskoi8r codec
|
418
|
+
'koi8_r' => 'cskoi8r',
|
419
|
+
|
420
|
+
# mac_cyrillic codec
|
421
|
+
'mac_cyrillic' => 'maccyrillic',
|
422
|
+
|
423
|
+
# shift_jis codec
|
424
|
+
'csshiftjis' => 'shift_jis',
|
425
|
+
'shiftjis' => 'shift_jis',
|
426
|
+
'sjis' => 'shift_jis',
|
427
|
+
's_jis' => 'shift_jis',
|
428
|
+
|
429
|
+
# shift_jisx0213 codec
|
430
|
+
'shiftjisx0213' => 'shift_jisx0213',
|
431
|
+
'sjisx0213' => 'shift_jisx0213',
|
432
|
+
's_jisx0213' => 'shift_jisx0213',
|
433
|
+
|
434
|
+
# utf_16 codec
|
435
|
+
'utf_16' => 'utf-16',
|
436
|
+
'u16' => 'utf-16',
|
437
|
+
'utf16' => 'utf-16',
|
438
|
+
|
439
|
+
# utf_16_be codec
|
440
|
+
'utf_16_be' => 'utf-16be',
|
441
|
+
'unicodebigunmarked' => 'utf-16be',
|
442
|
+
'utf_16be' => 'utf-16be',
|
443
|
+
|
444
|
+
# utf_16_le codec
|
445
|
+
'utf_16_le' => 'utf-16le',
|
446
|
+
'unicodelittleunmarked' => 'utf-16le',
|
447
|
+
'utf_16le' => 'utf-16le',
|
448
|
+
|
449
|
+
# utf_7 codec
|
450
|
+
'utf_7' => 'utf-7',
|
451
|
+
'u7' => 'utf-7',
|
452
|
+
'utf7' => 'utf-7',
|
453
|
+
|
454
|
+
# utf_8 codec
|
455
|
+
'utf_8' => 'utf-8',
|
456
|
+
'u8' => 'utf-8',
|
457
|
+
'utf' => 'utf-8',
|
458
|
+
'utf8' => 'utf-8',
|
459
|
+
'utf8_ucs2' => 'utf-8',
|
460
|
+
'utf8_ucs4' => 'utf-8',
|
461
|
+
}
|
462
|
+
|
463
|
+
def unicode(data, from_encoding)
|
464
|
+
# Takes a single string and converts it from the encoding in
|
465
|
+
# from_encoding to unicode.
|
466
|
+
uconvert(data, from_encoding, 'unicode')
|
467
|
+
end
|
468
|
+
|
469
|
+
def uconvert(data, from_encoding, to_encoding = 'utf-8')
|
470
|
+
from_encoding = Encoding_Aliases[from_encoding] || from_encoding
|
471
|
+
to_encoding = Encoding_Aliases[to_encoding] || to_encoding
|
472
|
+
Iconv.iconv(to_encoding, from_encoding, data)[0]
|
473
|
+
end
|
474
|
+
|
475
|
+
def unichr(i)
|
476
|
+
[i].pack('U*')
|
477
|
+
end
|
478
|
+
|
479
|
+
def index_match(stri,regexp, offset)
|
480
|
+
if offset == 241
|
481
|
+
end
|
482
|
+
i = stri.index(regexp, offset)
|
483
|
+
|
484
|
+
return nil, nil unless i
|
485
|
+
|
486
|
+
full = stri[i..-1].match(regexp)
|
487
|
+
return i, full
|
488
|
+
end
|
489
|
+
|
490
|
+
def _ebcdic_to_ascii(s)
|
491
|
+
return Iconv.iconv("iso88591", "ebcdic-cp-be", s)[0]
|
492
|
+
end
|
493
|
+
|
494
|
+
def urljoin(base, uri)
|
495
|
+
urifixer = /^([A-Za-z][A-Za-z0-9+-.]*:\/\/)(\/*)(.*?)/u
|
496
|
+
uri = uri.sub(urifixer, '\1\3')
|
497
|
+
begin
|
498
|
+
return URI.join(base, uri).to_s
|
499
|
+
rescue URI::BadURIError => e
|
500
|
+
if URI.parse(base).relative?
|
501
|
+
return URI::parse(uri).to_s
|
502
|
+
end
|
503
|
+
end
|
504
|
+
end
|
505
|
+
|
506
|
+
def py2rtime(pytuple)
|
507
|
+
Time.utc(pytuple[0..5])
|
508
|
+
end
|
509
|
+
|
510
|
+
# http://intertwingly.net/stories/2005/09/28/xchar.rb
|
511
|
+
module XChar
|
512
|
+
# http://intertwingly.net/stories/2004/04/14/i18n.html#CleaningWindows
|
513
|
+
CP1252 = {
|
514
|
+
128 => 8364, # euro sign
|
515
|
+
130 => 8218, # single low-9 quotation mark
|
516
|
+
131 => 402, # latin small letter f with hook
|
517
|
+
132 => 8222, # double low-9 quotation mark
|
518
|
+
133 => 8230, # horizontal ellipsis
|
519
|
+
134 => 8224, # dagger
|
520
|
+
135 => 8225, # double dagger
|
521
|
+
136 => 710, # modifier letter circumflex accent
|
522
|
+
137 => 8240, # per mille sign
|
523
|
+
138 => 352, # latin capital letter s with caron
|
524
|
+
139 => 8249, # single left-pointing angle quotation mark
|
525
|
+
140 => 338, # latin capital ligature oe
|
526
|
+
142 => 381, # latin capital letter z with caron
|
527
|
+
145 => 8216, # left single quotation mark
|
528
|
+
146 => 8217, # right single quotation mark
|
529
|
+
147 => 8220, # left double quotation mark
|
530
|
+
148 => 8221, # right double quotation mark
|
531
|
+
149 => 8226, # bullet
|
532
|
+
150 => 8211, # en dash
|
533
|
+
151 => 8212, # em dash
|
534
|
+
152 => 732, # small tilde
|
535
|
+
153 => 8482, # trade mark sign
|
536
|
+
154 => 353, # latin small letter s with caron
|
537
|
+
155 => 8250, # single right-pointing angle quotation mark
|
538
|
+
156 => 339, # latin small ligature oe
|
539
|
+
158 => 382, # latin small letter z with caron
|
540
|
+
159 => 376} # latin capital letter y with diaeresis
|
541
|
+
|
542
|
+
# http://www.w3.org/TR/REC-xml/#dt-chardata
|
543
|
+
PREDEFINED = {
|
544
|
+
38 => '&', # ampersand
|
545
|
+
60 => '<', # left angle bracket
|
546
|
+
62 => '>'} # right angle bracket
|
547
|
+
|
548
|
+
# http://www.w3.org/TR/REC-xml/#charsets
|
549
|
+
VALID = [[0x9, 0xA, 0xD], (0x20..0xD7FF),
|
550
|
+
(0xE000..0xFFFD), (0x10000..0x10FFFF)]
|
551
|
+
end
|
552
|
+
|
553
|
+
class Fixnum
|
554
|
+
# xml escaped version of chr
|
555
|
+
def xchr
|
556
|
+
n = XChar::CP1252[self] || self
|
557
|
+
n = 42 unless XChar::VALID.find {|range| range.include? n}
|
558
|
+
XChar::PREDEFINED[n] or (n<128 ? n.chr : "&##{n};")
|
559
|
+
end
|
560
|
+
end
|
561
|
+
|
562
|
+
class String
|
563
|
+
alias :old_index :index
|
564
|
+
def to_xs
|
565
|
+
unpack('U*').map {|n| n.xchr}.join # ASCII, UTF-8
|
566
|
+
rescue
|
567
|
+
unpack('C*').map {|n| n.xchr}.join # ISO-8859-1, WIN-1252
|
568
|
+
end
|
569
|
+
end
|
570
|
+
|
571
|
+
class BetterSGMLParserError < Exception; end;
|
572
|
+
class BetterSGMLParser < HTML::SGMLParser
|
573
|
+
# Replaced Tagfind and Charref Regexps with the ones in feedparser.py
|
574
|
+
# This makes things work.
|
575
|
+
Interesting = /[&<]/u
|
576
|
+
Incomplete = Regexp.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' +
|
577
|
+
'<([a-zA-Z][^<>]*|/([a-zA-Z][^<>]*)?|' +
|
578
|
+
'![^<>]*)?', 64) # 64 is the unicode flag
|
579
|
+
|
580
|
+
Entityref = /&([a-zA-Z][-.a-zA-Z0-9]*)[^-.a-zA-Z0-9]/u
|
581
|
+
Charref = /&#(x?[0-9A-Fa-f]+)[^0-9A-Fa-f]/u
|
582
|
+
|
583
|
+
Shorttagopen = /'<[a-zA-Z][-.a-zA-Z0-9]*/u
|
584
|
+
Shorttag = /'<([a-zA-Z][-.a-zA-Z0-9]*)\/([^\/]*)\//u
|
585
|
+
Endtagopen = /<\//u # Matching the Python SGMLParser
|
586
|
+
Endbracket = /[<>]/u
|
587
|
+
Declopen = /<!/u
|
588
|
+
Piopenbegin = /^<\?/u
|
589
|
+
Piclose = />/u
|
590
|
+
|
591
|
+
Commentopen = /<!--/u
|
592
|
+
Commentclose = /--\s*>/u
|
593
|
+
Tagfind = /[a-zA-Z][-_.:a-zA-Z0-9]*/u
|
594
|
+
Attrfind = Regexp.compile('\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'+
|
595
|
+
'(\'[^\']*\'|"[^"]*"|[\]\[\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?',
|
596
|
+
64)
|
597
|
+
Endtagfind = /\s*\/\s*>/u
|
598
|
+
def initialize(verbose=false)
|
599
|
+
super(verbose)
|
600
|
+
end
|
601
|
+
def feed(*args)
|
602
|
+
super(*args)
|
603
|
+
end
|
604
|
+
|
605
|
+
def goahead(_end)
|
606
|
+
rawdata = @rawdata # woo, utf-8 magic
|
607
|
+
i = 0
|
608
|
+
n = rawdata.length
|
609
|
+
while i < n
|
610
|
+
if @nomoretags
|
611
|
+
# handle_data_range does nothing more than set a "Range" that is never used. wtf?
|
612
|
+
handle_data(rawdata[i...n]) # i...n means "range from i to n not including n"
|
613
|
+
i = n
|
614
|
+
break
|
615
|
+
end
|
616
|
+
j = rawdata.index(Interesting, i)
|
617
|
+
j = n unless j
|
618
|
+
handle_data(rawdata[i...j]) if i < j
|
619
|
+
i = j
|
620
|
+
break if (i == n)
|
621
|
+
if rawdata[i..i] == '<' # equivalent to rawdata[i..i] == '<' # Yeah, ugly.
|
622
|
+
if rawdata.index(Starttagopen,i) == i
|
623
|
+
if @literal
|
624
|
+
handle_data(rawdata[i..i])
|
625
|
+
i = i+1
|
626
|
+
next
|
627
|
+
end
|
628
|
+
k = parse_starttag(i)
|
629
|
+
break unless k
|
630
|
+
i = k
|
631
|
+
next
|
632
|
+
end
|
633
|
+
if rawdata.index(Endtagopen,i) == i #Don't use Endtagopen
|
634
|
+
k = parse_endtag(i)
|
635
|
+
break unless k
|
636
|
+
i = k
|
637
|
+
@literal = false
|
638
|
+
next
|
639
|
+
end
|
640
|
+
if @literal
|
641
|
+
if n > (i+1)
|
642
|
+
handle_data("<")
|
643
|
+
i = i+1
|
644
|
+
else
|
645
|
+
#incomplete
|
646
|
+
break
|
647
|
+
end
|
648
|
+
next
|
649
|
+
end
|
650
|
+
if rawdata.index(Commentopen,i) == i
|
651
|
+
k = parse_comment(i)
|
652
|
+
break unless k
|
653
|
+
i = k
|
654
|
+
next
|
655
|
+
end
|
656
|
+
if rawdata.index(Piopenbegin,i) == i # Like Piopen but must be at beginning of rawdata
|
657
|
+
k = parse_pi(i)
|
658
|
+
break unless k
|
659
|
+
i += k
|
660
|
+
next
|
661
|
+
end
|
662
|
+
if rawdata.index(Declopen,i) == i
|
663
|
+
# This is some sort of declaration; in "HTML as
|
664
|
+
# deployed," this should only be the document type
|
665
|
+
# declaration ("<!DOCTYPE html...>").
|
666
|
+
k = parse_declaration(i)
|
667
|
+
break unless k
|
668
|
+
i = k
|
669
|
+
next
|
670
|
+
end
|
671
|
+
elsif rawdata[i..i] == '&'
|
672
|
+
if @literal # FIXME BUGME SGMLParser totally does not check this. Bug it.
|
673
|
+
handle_data(rawdata[i..i])
|
674
|
+
i += 1
|
675
|
+
next
|
676
|
+
end
|
677
|
+
|
678
|
+
# the Char must come first as its #=~ method is the only one that is UTF-8 safe
|
679
|
+
ni,match = index_match(rawdata, Charref, i)
|
680
|
+
if ni and ni == i # See? Ugly
|
681
|
+
handle_charref(match[1]) # $1 is just the first group we captured (with parentheses)
|
682
|
+
i += match[0].length # $& is the "all" of the match.. it includes the full match we looked for not just the stuff we put parentheses around to capture.
|
683
|
+
i -= 1 unless rawdata[i-1..i-1] == ";"
|
684
|
+
next
|
685
|
+
end
|
686
|
+
ni,match = index_match(rawdata, Entityref, i)
|
687
|
+
if ni and ni == i
|
688
|
+
handle_entityref(match[1])
|
689
|
+
i += match[0].length
|
690
|
+
i -= 1 unless rawdata[i-1..i-1] == ";"
|
691
|
+
next
|
692
|
+
end
|
693
|
+
else
|
694
|
+
error('neither < nor & ??')
|
695
|
+
end
|
696
|
+
# We get here only if incomplete matches but
|
697
|
+
# nothing else
|
698
|
+
ni,match = index_match(rawdata,Incomplete,i)
|
699
|
+
unless ni and ni == 0
|
700
|
+
handle_data(rawdata[i...i+1]) # str[i...i+1] == str[i..i]
|
701
|
+
i += 1
|
702
|
+
next
|
703
|
+
end
|
704
|
+
j = ni + match[0].length
|
705
|
+
break if j == n # Really incomplete
|
706
|
+
handle_data(rawdata[i...j])
|
707
|
+
i = j
|
708
|
+
end # end while
|
709
|
+
|
710
|
+
if _end and i < n
|
711
|
+
handle_data(rawdata[i...n])
|
712
|
+
i = n
|
713
|
+
end
|
714
|
+
|
715
|
+
@rawdata = rawdata[i..-1]
|
716
|
+
# @offset += i # FIXME BUGME another unused variable in SGMLParser?
|
717
|
+
end
|
718
|
+
|
719
|
+
|
720
|
+
# Internal -- parse processing instr, return length or -1 if not terminated
|
721
|
+
def parse_pi(i)
|
722
|
+
rawdata = @rawdata
|
723
|
+
if rawdata[i...i+2] != '<?'
|
724
|
+
error("unexpected call to parse_pi()")
|
725
|
+
end
|
726
|
+
ni,match = index_match(rawdata,Piclose,i+2)
|
727
|
+
return nil unless match
|
728
|
+
j = ni
|
729
|
+
handle_pi(rawdata[i+2...j])
|
730
|
+
j = (j + match[0].length)
|
731
|
+
return j-i
|
732
|
+
end
|
733
|
+
|
734
|
+
def parse_comment(i)
|
735
|
+
rawdata = @rawdata
|
736
|
+
if rawdata[i...i+4] != "<!--"
|
737
|
+
error("unexpected call to parse_comment()")
|
738
|
+
end
|
739
|
+
ni,match = index_match(rawdata, Commentclose,i)
|
740
|
+
return nil unless match
|
741
|
+
handle_comment(rawdata[i+4..(ni-1)])
|
742
|
+
return ni+match[0].length # Length from i to just past the closing comment tag
|
743
|
+
end
|
744
|
+
|
745
|
+
|
746
|
+
def parse_starttag(i)
|
747
|
+
@_starttag_text = nil
|
748
|
+
start_pos = i
|
749
|
+
rawdata = @rawdata
|
750
|
+
ni,match = index_match(rawdata,Shorttagopen,i)
|
751
|
+
if ni == i
|
752
|
+
# SGML shorthand: <tag/data/ == <tag>data</tag>
|
753
|
+
# XXX Can data contain &... (entity or char refs)?
|
754
|
+
# XXX Can data contain < or > (tag characters)?
|
755
|
+
# XXX Can there be whitespace before the first /?
|
756
|
+
k,match = index_match(rawdata,Shorttag,i)
|
757
|
+
return nil unless match
|
758
|
+
tag, data = match[1], match[2]
|
759
|
+
@_starttag_text = "<#{tag}/"
|
760
|
+
tag.downcase!
|
761
|
+
second_end = rawdata.index(Shorttagopen,k)
|
762
|
+
finish_shorttag(tag, data)
|
763
|
+
@_starttag_text = rawdata[start_pos...second_end+1]
|
764
|
+
return k
|
765
|
+
end
|
766
|
+
|
767
|
+
j = rawdata.index(Endbracket, i+1)
|
768
|
+
return nil unless j
|
769
|
+
attrsd = []
|
770
|
+
if rawdata[i...i+2] == '<>'
|
771
|
+
# SGML shorthand: <> == <last open tag seen>
|
772
|
+
k = j
|
773
|
+
tag = @lasttag
|
774
|
+
else
|
775
|
+
ni,match = index_match(rawdata,Tagfind,i+1)
|
776
|
+
unless match
|
777
|
+
error('unexpected call to parse_starttag')
|
778
|
+
end
|
779
|
+
k = ni+match[0].length+1
|
780
|
+
tag = match[0].downcase
|
781
|
+
@lasttag = tag
|
782
|
+
end
|
783
|
+
|
784
|
+
while k < j
|
785
|
+
break if rawdata.index(Endtagfind, k) == k
|
786
|
+
ni,match = index_match(rawdata,Attrfind,k)
|
787
|
+
break unless ni
|
788
|
+
matched_length = match[0].length
|
789
|
+
attrname, rest, attrvalue = match[1],match[2],match[3]
|
790
|
+
if rest.nil? or rest.empty?
|
791
|
+
attrvalue = '' # was: = attrname # Why the change?
|
792
|
+
elsif [?',?'] == [attrvalue[0..0], attrvalue[-1..-1]] or [?",?"] == [attrvalue[0],attrvalue[-1]]
|
793
|
+
attrvalue = attrvalue[1...-1]
|
794
|
+
end
|
795
|
+
attrsd << [attrname.downcase, attrvalue]
|
796
|
+
k += matched_length
|
797
|
+
end
|
798
|
+
if rawdata[j..j] == ">"
|
799
|
+
j += 1
|
800
|
+
end
|
801
|
+
@_starttag_text = rawdata[start_pos...j]
|
802
|
+
finish_starttag(tag, attrsd)
|
803
|
+
return j
|
804
|
+
end
|
805
|
+
|
806
|
+
def parse_endtag(i)
|
807
|
+
rawdata = @rawdata
|
808
|
+
j, match = index_match(rawdata, /[<>]/,i+1)
|
809
|
+
return nil unless j
|
810
|
+
tag = rawdata[i+2...j].strip.downcase
|
811
|
+
if rawdata[j..j] == ">"
|
812
|
+
j += 1
|
813
|
+
end
|
814
|
+
finish_endtag(tag)
|
815
|
+
return j
|
816
|
+
end
|
817
|
+
|
818
|
+
def output
|
819
|
+
# Return processed HTML as a single string
|
820
|
+
return @pieces.map{|p| p.to_s}.join
|
821
|
+
end
|
822
|
+
|
823
|
+
def error(message)
|
824
|
+
raise BetterSGMLParserError.new(message)
|
825
|
+
end
|
826
|
+
def handle_pi(text)
|
827
|
+
end
|
828
|
+
def handle_decl(text)
|
829
|
+
end
|
830
|
+
end
|
831
|
+
|
832
|
+
# Add some helper methods to make AttributeList (all of those damn attrs
|
833
|
+
# and attrsD used by StrictFeedParser) act more like a Hash.
|
834
|
+
# NOTE AttributeList is still Read-Only (AFAICT).
|
835
|
+
# Monkey patching is terrible, and I have an addiction.
|
836
|
+
module XML
|
837
|
+
module SAX
|
838
|
+
module AttributeList # in xml/sax.rb
|
839
|
+
def [](key)
|
840
|
+
getValue(key)
|
841
|
+
end
|
842
|
+
|
843
|
+
def each(&blk)
|
844
|
+
(0...getLength).each{|pos| yield [getName(pos), getValue(pos)]}
|
845
|
+
end
|
846
|
+
|
847
|
+
def each_key(&blk)
|
848
|
+
(0...getLength).each{|pos| yield getName(pos) }
|
849
|
+
end
|
850
|
+
|
851
|
+
def each_value(&blk)
|
852
|
+
(0...getLength).each{|pos| yield getValue(pos) }
|
853
|
+
end
|
854
|
+
|
855
|
+
def to_a # Rather use collect? grep for to_a.collect
|
856
|
+
l = []
|
857
|
+
each{|k,v| l << [k,v]}
|
858
|
+
return l
|
859
|
+
end
|
860
|
+
|
861
|
+
def to_s
|
862
|
+
l = []
|
863
|
+
each{|k,v| l << "#{k} => #{v}"}
|
864
|
+
"{ "+l.join(", ")+" }"
|
865
|
+
end
|
866
|
+
end
|
867
|
+
end
|
868
|
+
end
|
869
|
+
# This adds a nice scrub method to Hpricot, so we don't need a _HTMLSanitizer class
|
870
|
+
# http://underpantsgnome.com/2007/01/20/hpricot-scrub
|
871
|
+
# I have modified it to check for attributes that are only allowed if they are in a certain tag
|
872
|
+
module Hpricot
|
873
|
+
Acceptable_Elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
|
874
|
+
'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
|
875
|
+
'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
|
876
|
+
'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
877
|
+
'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map',
|
878
|
+
'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp',
|
879
|
+
'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
|
880
|
+
'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
|
881
|
+
'ul', 'var'
|
882
|
+
]
|
883
|
+
|
884
|
+
Acceptable_Attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
|
885
|
+
'action', 'align', 'alt', 'axis', 'border', 'cellpadding',
|
886
|
+
'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
|
887
|
+
'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
|
888
|
+
'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
|
889
|
+
'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
|
890
|
+
'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
|
891
|
+
'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
|
892
|
+
'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
|
893
|
+
'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
|
894
|
+
'type', 'usemap', 'valign', 'value', 'vspace', 'width', 'xml:lang'
|
895
|
+
]
|
896
|
+
|
897
|
+
Unacceptable_Elements_With_End_Tag = ['script', 'applet']
|
898
|
+
|
899
|
+
Acceptable_Css_Properties = ['azimuth', 'background-color',
|
900
|
+
'border-bottom-color', 'border-collapse', 'border-color',
|
901
|
+
'border-left-color', 'border-right-color', 'border-top-color', 'clear',
|
902
|
+
'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
|
903
|
+
'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
|
904
|
+
'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
|
905
|
+
'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
|
906
|
+
'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
|
907
|
+
'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
|
908
|
+
'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
|
909
|
+
'white-space', 'width'
|
910
|
+
]
|
911
|
+
|
912
|
+
# survey of common keywords found in feeds
|
913
|
+
Acceptable_Css_Keywords = ['auto', 'aqua', 'black', 'block', 'blue',
|
914
|
+
'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
|
915
|
+
'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
|
916
|
+
'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
|
917
|
+
'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
|
918
|
+
'transparent', 'underline', 'white', 'yellow'
|
919
|
+
]
|
920
|
+
|
921
|
+
Mathml_Elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
|
922
|
+
'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
|
923
|
+
'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
|
924
|
+
'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
|
925
|
+
'munderover', 'none'
|
926
|
+
]
|
927
|
+
|
928
|
+
Mathml_Attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
|
929
|
+
'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
|
930
|
+
'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
|
931
|
+
'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
|
932
|
+
'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
|
933
|
+
'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
|
934
|
+
'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
|
935
|
+
'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
|
936
|
+
'xlink:type', 'xmlns', 'xmlns:xlink'
|
937
|
+
]
|
938
|
+
|
939
|
+
# svgtiny - foreignObject + linearGradient + radialGradient + stop
|
940
|
+
Svg_Elements = ['a', 'animate', 'animateColor', 'animateMotion',
|
941
|
+
'animateTransform', 'circle', 'defs', 'desc', 'ellipse', 'font-face',
|
942
|
+
'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern', 'image',
|
943
|
+
'linearGradient', 'line', 'metadata', 'missing-glyph', 'mpath', 'path',
|
944
|
+
'polygon', 'polyline', 'radialGradient', 'rect', 'set', 'stop', 'svg',
|
945
|
+
'switch', 'text', 'title', 'use'
|
946
|
+
]
|
947
|
+
|
948
|
+
# svgtiny + class + opacity + offset + xmlns + xmlns:xlink
|
949
|
+
Svg_Attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
|
950
|
+
'arabic-form', 'ascent', 'attributeName', 'attributeType',
|
951
|
+
'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
|
952
|
+
'class', 'color', 'color-rendering', 'content', 'cx', 'cy', 'd',
|
953
|
+
'descent', 'display', 'dur', 'end', 'fill', 'fill-rule', 'font-family',
|
954
|
+
'font-size', 'font-stretch', 'font-style', 'font-variant',
|
955
|
+
'font-weight', 'from', 'fx', 'fy', 'g1', 'g2', 'glyph-name',
|
956
|
+
'gradientUnits', 'hanging', 'height', 'horiz-adv-x', 'horiz-origin-x',
|
957
|
+
'id', 'ideographic', 'k', 'keyPoints', 'keySplines', 'keyTimes',
|
958
|
+
'lang', 'mathematical', 'max', 'min', 'name', 'offset', 'opacity',
|
959
|
+
'origin', 'overline-position', 'overline-thickness', 'panose-1',
|
960
|
+
'path', 'pathLength', 'points', 'preserveAspectRatio', 'r',
|
961
|
+
'repeatCount', 'repeatDur', 'requiredExtensions', 'requiredFeatures',
|
962
|
+
'restart', 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv',
|
963
|
+
'stop-color', 'stop-opacity', 'strikethrough-position',
|
964
|
+
'strikethrough-thickness', 'stroke', 'stroke-dasharray',
|
965
|
+
'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
|
966
|
+
'stroke-miterlimit', 'stroke-width', 'systemLanguage', 'target',
|
967
|
+
'text-anchor', 'to', 'transform', 'type', 'u1', 'u2',
|
968
|
+
'underline-position', 'underline-thickness', 'unicode',
|
969
|
+
'unicode-range', 'units-per-em', 'values', 'version', 'viewBox',
|
970
|
+
'visibility', 'width', 'widths', 'x', 'x-height', 'x1', 'x2',
|
971
|
+
'xlink:actuate', 'xlink:arcrole', 'xlink:href', 'xlink:role',
|
972
|
+
'xlink:show', 'xlink:title', 'xlink:type', 'xml:base', 'xml:lang',
|
973
|
+
'xml:space', 'xmlns', 'xmlns:xlink', 'y', 'y1', 'y2', 'zoomAndPan'
|
974
|
+
]
|
975
|
+
|
976
|
+
Svg_Attr_Map = nil
|
977
|
+
Svg_Elem_Map = nil
|
978
|
+
|
979
|
+
Acceptable_Svg_Properties = [ 'fill', 'fill-opacity', 'fill-rule',
|
980
|
+
'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
|
981
|
+
'stroke-opacity'
|
982
|
+
]
|
983
|
+
|
984
|
+
unless $compatible
|
985
|
+
@@acceptable_tag_specific_attributes = {}
|
986
|
+
@@mathml_elements.each{|e| @@acceptable_tag_specific_attributes[e] = @@mathml_attributes }
|
987
|
+
@@svg_elements.each{|e| @@acceptable_tag_specific_attributes[e] = @@svg_attributes }
|
988
|
+
end
|
989
|
+
|
990
|
+
class Elements
|
991
|
+
def strip(allowed_tags=[]) # I completely route around this with the recursive_strip in Doc
|
992
|
+
each { |x| x.strip(allowed_tags) }
|
993
|
+
end
|
994
|
+
|
995
|
+
def strip_attributes(safe=[])
|
996
|
+
each { |x| x.strip_attributes(safe) }
|
997
|
+
end
|
998
|
+
|
999
|
+
def strip_style(ok_props=[], ok_keywords=[])
|
1000
|
+
each { |x| x.strip_style(ok_props, ok_keywords) }
|
1001
|
+
end
|
1002
|
+
end
|
1003
|
+
|
1004
|
+
class Text
|
1005
|
+
def strip(foo)
|
1006
|
+
end
|
1007
|
+
def strip_attributes(foo)
|
1008
|
+
end
|
1009
|
+
end
|
1010
|
+
class Comment
|
1011
|
+
def strip(foo)
|
1012
|
+
end
|
1013
|
+
def strip_attributes(foo)
|
1014
|
+
end
|
1015
|
+
end
|
1016
|
+
class BogusETag
|
1017
|
+
def strip(foo)
|
1018
|
+
end
|
1019
|
+
def strip_attributes(foo)
|
1020
|
+
end
|
1021
|
+
end
|
1022
|
+
|
1023
|
+
class Elem
|
1024
|
+
def decode_entities
|
1025
|
+
children.each{ |x| x.decode_entities }
|
1026
|
+
end
|
1027
|
+
|
1028
|
+
def cull
|
1029
|
+
if children
|
1030
|
+
swap(children.to_s)
|
1031
|
+
end
|
1032
|
+
end
|
1033
|
+
|
1034
|
+
def strip
|
1035
|
+
if strip_removes?
|
1036
|
+
cull
|
1037
|
+
end
|
1038
|
+
end
|
1039
|
+
|
1040
|
+
def strip_attributes
|
1041
|
+
unless attributes.nil?
|
1042
|
+
attributes.each do |atr|
|
1043
|
+
unless Acceptable_Attributes.include?atr[0]
|
1044
|
+
remove_attribute(atr[0])
|
1045
|
+
end
|
1046
|
+
end
|
1047
|
+
end
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
def strip_removes?
|
1051
|
+
# I'm sure there are others that shuould be ripped instead of stripped
|
1052
|
+
attributes && attributes['type'] =~ /script|css/
|
1053
|
+
end
|
1054
|
+
end
|
1055
|
+
end
|
1056
|
+
|
1057
|
+
module FeedParser
|
1058
|
+
Version = "0.1aleph_naught"
|
1059
|
+
|
1060
|
+
License = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
|
1061
|
+
|
1062
|
+
Redistribution and use in source and binary forms, with or without modification,
|
1063
|
+
are permitted provided that the following conditions are met:
|
1064
|
+
|
1065
|
+
* Redistributions of source code must retain the above copyright notice,
|
1066
|
+
this list of conditions and the following disclaimer.
|
1067
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
1068
|
+
this list of conditions and the following disclaimer in the documentation
|
1069
|
+
and/or other materials provided with the distribution.
|
1070
|
+
|
1071
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
1072
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
1073
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
1074
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
1075
|
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
1076
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
1077
|
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
1078
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
1079
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
1080
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
1081
|
+
POSSIBILITY OF SUCH DAMAGE."""
|
1082
|
+
|
1083
|
+
Author = "Jeff Hodges <http://somethingsimilar.com>"
|
1084
|
+
Copyright_Holder = "Mark Pilgrim <http://diveintomark.org/>"
|
1085
|
+
Contributors = [ "Jason Diamond <http://injektilo.org/>",
|
1086
|
+
"John Beimler <http://john.beimler.org/>",
|
1087
|
+
"Fazal Majid <http://www.majid.info/mylos/weblog/>",
|
1088
|
+
"Aaron Swartz <http://aaronsw.com/>",
|
1089
|
+
"Kevin Marks <http://epeus.blogspot.com/>"
|
1090
|
+
]
|
1091
|
+
# HTTP "User-Agent" header to send to servers when downloading feeds.
|
1092
|
+
# If you are embedding feedparser in a larger application, you should
|
1093
|
+
# change this to your application name and URL.
|
1094
|
+
USER_AGENT = "UniversalFeedParser/%s +http://feedparser.org/" % @version
|
1095
|
+
|
1096
|
+
# HTTP "Accept" header to send to servers when downloading feeds. If you don't
|
1097
|
+
# want to send an Accept header, set this to None.
|
1098
|
+
ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1"
|
1099
|
+
|
1100
|
+
|
1101
|
+
# If you want feedparser to automatically run HTML markup through HTML Tidy, set
|
1102
|
+
# this to true. Requires mxTidy <http://www.egenix.com/files/python/mxTidy.html>
|
1103
|
+
# or utidylib <http://utidylib.berlios.de/>.
|
1104
|
+
TIDY_MARKUP = false #FIXME untranslated
|
1105
|
+
|
1106
|
+
# List of Python interfaces for HTML Tidy, in order of preference. Only useful
|
1107
|
+
# if TIDY_MARKUP = true
|
1108
|
+
PREFERRED_TIDY_INTERFACES = ["uTidy", "mxTidy"] #FIXME untranslated
|
1109
|
+
|
1110
|
+
# The original Python import. I'm using it to help translate
|
1111
|
+
#import sgmllib, re, sys, copy, urlparse, time, rfc822, types, cgi, urllib, urllib2
|
1112
|
+
|
1113
|
+
|
1114
|
+
|
1115
|
+
# ---------- don't touch these ----------
|
1116
|
+
class ThingsNobodyCaresAboutButMe < Exception
|
1117
|
+
end
|
1118
|
+
class CharacterEncodingOverride < ThingsNobodyCaresAboutButMe
|
1119
|
+
end
|
1120
|
+
class CharacterEncodingUnknown < ThingsNobodyCaresAboutButMe
|
1121
|
+
end
|
1122
|
+
class NonXMLContentType < ThingsNobodyCaresAboutButMe
|
1123
|
+
end
|
1124
|
+
class UndeclaredNamespace < Exception
|
1125
|
+
end
|
1126
|
+
|
1127
|
+
|
1128
|
+
SUPPORTED_VERSIONS = {'' => 'unknown',
|
1129
|
+
'rss090' => 'RSS 0.90',
|
1130
|
+
'rss091n' => 'RSS 0.91 (Netscape)',
|
1131
|
+
'rss091u' => 'RSS 0.91 (Userland)',
|
1132
|
+
'rss092' => 'RSS 0.92',
|
1133
|
+
'rss093' => 'RSS 0.93',
|
1134
|
+
'rss094' => 'RSS 0.94',
|
1135
|
+
'rss20' => 'RSS 2.0',
|
1136
|
+
'rss10' => 'RSS 1.0',
|
1137
|
+
'rss' => 'RSS (unknown version)',
|
1138
|
+
'atom01' => 'Atom 0.1',
|
1139
|
+
'atom02' => 'Atom 0.2',
|
1140
|
+
'atom03' => 'Atom 0.3',
|
1141
|
+
'atom10' => 'Atom 1.0',
|
1142
|
+
'atom' => 'Atom (unknown version)',
|
1143
|
+
'cdf' => 'CDF',
|
1144
|
+
'hotrss' => 'Hot RSS'
|
1145
|
+
}
|
1146
|
+
class FeedParserDict < Hash
|
1147
|
+
=begin
|
1148
|
+
The naming of a certain common attribute (such as, "When was the last
|
1149
|
+
time this feed was updated?") can have many different names depending
|
1150
|
+
on the type of feed we are handling. This class allows us to use
|
1151
|
+
both the attribute name a person, who has knowledge of the kind of
|
1152
|
+
feed being parsed, expects, as well as allowing a developer to rely
|
1153
|
+
on one name to contain the proper attribute no matter what kind of
|
1154
|
+
feed is being parsed. @@keymaps is a Hash that contains information
|
1155
|
+
on what certain attributes "really is" in each feed type. It does so
|
1156
|
+
by providing a common name that will map to any feed type in the keys,
|
1157
|
+
with possible "correct" attributes in the its values. the #[] and #[]=
|
1158
|
+
methods check with keymaps to see what attribute the developer "really
|
1159
|
+
means" if they've asked for one which happens to be in @@keymap's keys.
|
1160
|
+
=end
|
1161
|
+
@@keymap = {'channel' => 'feed',
|
1162
|
+
'items' => 'entries',
|
1163
|
+
'guid' => 'id',
|
1164
|
+
'date' => 'updated',
|
1165
|
+
'date_parsed' => 'updated_parsed',
|
1166
|
+
'description' => ['subtitle', 'summary'],
|
1167
|
+
'url' => ['href'],
|
1168
|
+
'modified' => 'updated',
|
1169
|
+
'modified_parsed' => 'updated_parsed',
|
1170
|
+
'issued' => 'published',
|
1171
|
+
'issued_parsed' => 'published_parsed',
|
1172
|
+
'copyright' => 'rights',
|
1173
|
+
'copyright_detail' => 'rights_detail',
|
1174
|
+
'tagline' => 'subtitle',
|
1175
|
+
'tagline_detail' => 'subtitle_detail'}
|
1176
|
+
|
1177
|
+
def entries # Apparently, Hash has an entries method! That blew a good 3 hours or more of my time
|
1178
|
+
return self['entries']
|
1179
|
+
end
|
1180
|
+
# We could include the [] rewrite in new using Hash.new's fancy pants block thing
|
1181
|
+
# but we'd still have to overwrite []= and such.
|
1182
|
+
# I'm going to make it easy to turn lists of pairs into FeedParserDicts's though.
|
1183
|
+
def initialize(pairs=nil)
|
1184
|
+
if pairs.class == Array and pairs[0].class == Array and pairs[0].length == 2
|
1185
|
+
pairs.each do |l|
|
1186
|
+
k,v = l
|
1187
|
+
self[k] = v
|
1188
|
+
end
|
1189
|
+
elsif pairs.class == Hash
|
1190
|
+
self.merge!(pairs)
|
1191
|
+
end
|
1192
|
+
end
|
1193
|
+
|
1194
|
+
def [](key)
|
1195
|
+
if key == 'category'
|
1196
|
+
return self['tags'][0]['term']
|
1197
|
+
end
|
1198
|
+
if key == 'categories'
|
1199
|
+
return self['tags'].collect{|tag| [tag['scheme'],tag['term']]}
|
1200
|
+
end
|
1201
|
+
realkey = @@keymap[key] || key
|
1202
|
+
if realkey.class == Array
|
1203
|
+
realkey.each{ |key| return self[key] if has_key?key }
|
1204
|
+
end
|
1205
|
+
# Note that the original key is preferred over the realkey we (might
|
1206
|
+
# have) found in @@keymaps
|
1207
|
+
if has_key?(key)
|
1208
|
+
return super(key)
|
1209
|
+
end
|
1210
|
+
return super(realkey)
|
1211
|
+
end
|
1212
|
+
|
1213
|
+
def []=(key,value)
|
1214
|
+
if @@keymap.key?key
|
1215
|
+
key = @@keymap[key]
|
1216
|
+
if key.class == Array
|
1217
|
+
key = key[0]
|
1218
|
+
end
|
1219
|
+
end
|
1220
|
+
super(key,value)
|
1221
|
+
end
|
1222
|
+
|
1223
|
+
def method_missing(msym, *args)
|
1224
|
+
methodname = msym.to_s
|
1225
|
+
if methodname[-1] == '='
|
1226
|
+
return self[methodname[0..-2]] = args[0]
|
1227
|
+
elsif methodname[-1] != '!' and methodname[-1] != '?' and methodname[0] != "_" # FIXME implement with private
|
1228
|
+
return self[methodname]
|
1229
|
+
else
|
1230
|
+
raise NoMethodError, "whoops, we don't know about the attribute or method called `#{methodname}' for #{self}:#{self.class}"
|
1231
|
+
end
|
1232
|
+
end
|
1233
|
+
end
|
1234
|
+
|
1235
|
+
|
1236
|
+
|
1237
|
+
|
1238
|
+
module FeedParserMixin
|
1239
|
+
attr_accessor :feeddata, :version, :namespacesInUse, :date_handlers
|
1240
|
+
|
1241
|
+
def startup(baseuri=nil, baselang=nil, encoding='utf-8')
|
1242
|
+
$stderr << "initializing FeedParser\n" if $debug
|
1243
|
+
|
1244
|
+
@namespaces = {'' => '',
|
1245
|
+
'http://backend.userland.com/rss' => '',
|
1246
|
+
'http://blogs.law.harvard.edu/tech/rss' => '',
|
1247
|
+
'http://purl.org/rss/1.0/' => '',
|
1248
|
+
'http://my.netscape.com/rdf/simple/0.9/' => '',
|
1249
|
+
'http://example.com/newformat#' => '',
|
1250
|
+
'http://example.com/necho' => '',
|
1251
|
+
'http://purl.org/echo/' => '',
|
1252
|
+
'uri/of/echo/namespace#' => '',
|
1253
|
+
'http://purl.org/pie/' => '',
|
1254
|
+
'http://purl.org/atom/ns#' => '',
|
1255
|
+
'http://www.w3.org/2005/Atom' => '',
|
1256
|
+
'http://purl.org/rss/1.0/modules/rss091#' => '',
|
1257
|
+
'http://webns.net/mvcb/' => 'admin',
|
1258
|
+
'http://purl.org/rss/1.0/modules/aggregation/' => 'ag',
|
1259
|
+
'http://purl.org/rss/1.0/modules/annotate/' => 'annotate',
|
1260
|
+
'http://media.tangent.org/rss/1.0/' => 'audio',
|
1261
|
+
'http://backend.userland.com/blogChannelModule' => 'blogChannel',
|
1262
|
+
'http://web.resource.org/cc/' => 'cc',
|
1263
|
+
'http://backend.userland.com/creativeCommonsRssModule' => 'creativeCommons',
|
1264
|
+
'http://purl.org/rss/1.0/modules/company' => 'co',
|
1265
|
+
'http://purl.org/rss/1.0/modules/content/' => 'content',
|
1266
|
+
'http://my.theinfo.org/changed/1.0/rss/' => 'cp',
|
1267
|
+
'http://purl.org/dc/elements/1.1/' => 'dc',
|
1268
|
+
'http://purl.org/dc/terms/' => 'dcterms',
|
1269
|
+
'http://purl.org/rss/1.0/modules/email/' => 'email',
|
1270
|
+
'http://purl.org/rss/1.0/modules/event/' => 'ev',
|
1271
|
+
'http://rssnamespace.org/feedburner/ext/1.0' => 'feedburner',
|
1272
|
+
'http://freshmeat.net/rss/fm/' => 'fm',
|
1273
|
+
'http://xmlns.com/foaf/0.1/' => 'foaf',
|
1274
|
+
'http://www.w3.org/2003/01/geo/wgs84_pos#' => 'geo',
|
1275
|
+
'http://postneo.com/icbm/' => 'icbm',
|
1276
|
+
'http://purl.org/rss/1.0/modules/image/' => 'image',
|
1277
|
+
'http://www.itunes.com/DTDs/PodCast-1.0.dtd' => 'itunes',
|
1278
|
+
'http://example.com/DTDs/PodCast-1.0.dtd' => 'itunes',
|
1279
|
+
'http://purl.org/rss/1.0/modules/link/' => 'l',
|
1280
|
+
'http://search.yahoo.com/mrss' => 'media',
|
1281
|
+
'http://madskills.com/public/xml/rss/module/pingback/' => 'pingback',
|
1282
|
+
'http://prismstandard.org/namespaces/1.2/basic/' => 'prism',
|
1283
|
+
'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf',
|
1284
|
+
'http://www.w3.org/2000/01/rdf-schema#' => 'rdfs',
|
1285
|
+
'http://purl.org/rss/1.0/modules/reference/' => 'ref',
|
1286
|
+
'http://purl.org/rss/1.0/modules/richequiv/' => 'reqv',
|
1287
|
+
'http://purl.org/rss/1.0/modules/search/' => 'search',
|
1288
|
+
'http://purl.org/rss/1.0/modules/slash/' => 'slash',
|
1289
|
+
'http://schemas.xmlsoap.org/soap/envelope/' => 'soap',
|
1290
|
+
'http://purl.org/rss/1.0/modules/servicestatus/' => 'ss',
|
1291
|
+
'http://hacks.benhammersley.com/rss/streaming/' => 'str',
|
1292
|
+
'http://purl.org/rss/1.0/modules/subscription/' => 'sub',
|
1293
|
+
'http://purl.org/rss/1.0/modules/syndication/' => 'sy',
|
1294
|
+
'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo',
|
1295
|
+
'http://purl.org/rss/1.0/modules/threading/' => 'thr',
|
1296
|
+
'http://purl.org/rss/1.0/modules/textinput/' => 'ti',
|
1297
|
+
'http://madskills.com/public/xml/rss/module/trackback/' =>'trackback',
|
1298
|
+
'http://wellformedweb.org/commentAPI/' => 'wfw',
|
1299
|
+
'http://purl.org/rss/1.0/modules/wiki/' => 'wiki',
|
1300
|
+
'http://www.w3.org/1999/xhtml' => 'xhtml',
|
1301
|
+
'http://www.w3.org/XML/1998/namespace' => 'xml',
|
1302
|
+
'http://www.w3.org/1999/xlink' => 'xlink',
|
1303
|
+
'http://schemas.pocketsoap.com/rss/myDescModule/' => 'szf'
|
1304
|
+
}
|
1305
|
+
@matchnamespaces = {}
|
1306
|
+
@namespaces.each do |l|
|
1307
|
+
@matchnamespaces[l[0].downcase] = l[1]
|
1308
|
+
end
|
1309
|
+
@can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'license', 'icon', 'logo']
|
1310
|
+
@can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
|
1311
|
+
@can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
|
1312
|
+
@html_types = ['text/html', 'application/xhtml+xml']
|
1313
|
+
@feeddata = FeedParserDict.new # feed-level data
|
1314
|
+
@encoding = encoding # character encoding
|
1315
|
+
@entries = [] # list of entry-level data
|
1316
|
+
@version = '' # feed type/version see SUPPORTED_VERSIOSN
|
1317
|
+
@namespacesInUse = {} # hash of namespaces defined by the feed
|
1318
|
+
|
1319
|
+
# the following are used internall to track state;
|
1320
|
+
# this is really out of control and should be refactored
|
1321
|
+
@infeed = false
|
1322
|
+
@inentry = false
|
1323
|
+
@incontent = 0 # Yes, this needs to be zero until I work out popContent and pushContent
|
1324
|
+
@intextinput = false
|
1325
|
+
@inimage = false
|
1326
|
+
@inauthor = false
|
1327
|
+
@incontributor = false
|
1328
|
+
@inpublisher = false
|
1329
|
+
@insource = false
|
1330
|
+
@sourcedata = FeedParserDict.new
|
1331
|
+
@contentparams = FeedParserDict.new
|
1332
|
+
@summaryKey = nil
|
1333
|
+
@namespacemap = {}
|
1334
|
+
@elementstack = []
|
1335
|
+
@basestack = []
|
1336
|
+
@langstack = []
|
1337
|
+
@baseuri = baseuri || ''
|
1338
|
+
@lang = baselang || nil
|
1339
|
+
if baselang
|
1340
|
+
@feeddata['language'] = baselang.gsub('_','-')
|
1341
|
+
end
|
1342
|
+
@date_handlers = [:_parse_date_rfc822,
|
1343
|
+
:_parse_date_hungarian, :_parse_date_greek,:_parse_date_mssql,
|
1344
|
+
:_parse_date_nate,:_parse_date_onblog,:_parse_date_w3dtf,:_parse_date_iso8601
|
1345
|
+
]
|
1346
|
+
$stderr << "Leaving startup\n" if $debug # My addition
|
1347
|
+
end
|
1348
|
+
|
1349
|
+
def unknown_starttag(tag, attrsd)
|
1350
|
+
$stderr << "start #{tag} with #{attrsd}\n" if $debug
|
1351
|
+
# normalize attrs
|
1352
|
+
attrsD = {}
|
1353
|
+
attrsd = Hash[*attrsd.flatten] if attrsd.class == Array # Magic! Asterisk!
|
1354
|
+
# LooseFeedParser needs the above because SGMLParser sends attrs as a
|
1355
|
+
# list of lists (like [['type','text/html'],['mode','escaped']])
|
1356
|
+
|
1357
|
+
attrsd.each do |old_k,value|
|
1358
|
+
# There has to be a better, non-ugly way of doing this
|
1359
|
+
k = old_k.downcase # Downcase all keys
|
1360
|
+
attrsD[k] = value
|
1361
|
+
if ['rel','type'].include?value
|
1362
|
+
attrsD[k].downcase! # Downcase the value if the key is 'rel' or 'type'
|
1363
|
+
end
|
1364
|
+
end
|
1365
|
+
|
1366
|
+
# track xml:base and xml:lang
|
1367
|
+
baseuri = attrsD['xml:base'] || attrsD['base'] || @baseuri
|
1368
|
+
@baseuri = urljoin(@baseuri, baseuri)
|
1369
|
+
lang = attrsD['xml:lang'] || attrsD['lang']
|
1370
|
+
if lang == '' # FIXME This next bit of code is right? Wtf?
|
1371
|
+
# xml:lang could be explicitly set to '', we need to capture that
|
1372
|
+
lang = nil
|
1373
|
+
elsif lang.nil?
|
1374
|
+
# if no xml:lang is specified, use parent lang
|
1375
|
+
lang = @lang
|
1376
|
+
end
|
1377
|
+
if lang and not lang.empty? # Seriously, this cannot be correct
|
1378
|
+
if ['feed', 'rss', 'rdf:RDF'].include?tag
|
1379
|
+
@feeddata['language'] = lang.gsub('_','-')
|
1380
|
+
end
|
1381
|
+
end
|
1382
|
+
@lang = lang
|
1383
|
+
@basestack << @baseuri
|
1384
|
+
@langstack << lang
|
1385
|
+
|
1386
|
+
# track namespaces
|
1387
|
+
attrsd.each do |prefix, uri|
|
1388
|
+
if /^xmlns:/ =~ prefix # prefix begins with xmlns:
|
1389
|
+
trackNamespace(prefix[6..-1], uri)
|
1390
|
+
elsif prefix == 'xmlns':
|
1391
|
+
trackNamespace(nil, uri)
|
1392
|
+
end
|
1393
|
+
end
|
1394
|
+
|
1395
|
+
# track inline content
|
1396
|
+
if @incontent != 0 and @contentparams.has_key?('type') and not ( /xml$/ =~ (@contentparams['type'] || 'xml') )
|
1397
|
+
# element declared itself as escaped markup, but isn't really
|
1398
|
+
|
1399
|
+
@contentparams['type'] = 'application/xhtml+xml'
|
1400
|
+
end
|
1401
|
+
if @incontent != 0 and @contentparams['type'] == 'application/xhtml+xml'
|
1402
|
+
# Note: probably shouldn't simply recreate localname here, but
|
1403
|
+
# our namespace handling isn't actually 100% correct in cases where
|
1404
|
+
# the feed redefines the default namespace (which is actually
|
1405
|
+
# the usual case for inline content, thanks Sam), so here we
|
1406
|
+
# cheat and just reconstruct the element based on localname
|
1407
|
+
# because that compensates for the bugs in our namespace handling.
|
1408
|
+
# This will horribly munge inline content with non-empty qnames,
|
1409
|
+
# but nobody actually does that, so I'm not fixing it.
|
1410
|
+
tag = tag.split(':')[-1]
|
1411
|
+
attrsA = attrsd.to_a.collect{|l| "#{l[0]}=\"#{l[1]}\""}
|
1412
|
+
attrsS = ' '+attrsA.join(' ')
|
1413
|
+
return handle_data("<#{tag}#{attrsS}>", escape=false)
|
1414
|
+
end
|
1415
|
+
|
1416
|
+
# match namespaces
|
1417
|
+
if /:/ =~ tag
|
1418
|
+
prefix, suffix = tag.split(':', 2)
|
1419
|
+
else
|
1420
|
+
prefix, suffix = '', tag
|
1421
|
+
end
|
1422
|
+
prefix = @namespacemap[prefix] || prefix
|
1423
|
+
if prefix and not prefix.empty?
|
1424
|
+
prefix = prefix + '_'
|
1425
|
+
end
|
1426
|
+
|
1427
|
+
# special hack for better tracking of empty textinput/image elements in illformed feeds
|
1428
|
+
if (not prefix and not prefix.empty?) and not (['title', 'link', 'description','name'].include?tag)
|
1429
|
+
@intextinput = false
|
1430
|
+
end
|
1431
|
+
if (prefix.nil? or prefix.empty?) and not (['title', 'link', 'description', 'url', 'href', 'width', 'height'].include?tag)
|
1432
|
+
@inimage = false
|
1433
|
+
end
|
1434
|
+
|
1435
|
+
# call special handler (if defined) or default handler
|
1436
|
+
begin
|
1437
|
+
return send('_start_'+prefix+suffix, attrsD)
|
1438
|
+
rescue NoMethodError
|
1439
|
+
return push(prefix + suffix, true)
|
1440
|
+
end
|
1441
|
+
end # End unknown_starttag
|
1442
|
+
|
1443
|
+
def unknown_endtag(tag)
|
1444
|
+
$stderr << "end #{tag}\n" if $debug
|
1445
|
+
# match namespaces
|
1446
|
+
if tag.index(':')
|
1447
|
+
prefix, suffix = tag.split(':',2)
|
1448
|
+
else
|
1449
|
+
prefix, suffix = '', tag
|
1450
|
+
end
|
1451
|
+
prefix = @namespacemap[prefix] || prefix
|
1452
|
+
if prefix and not prefix.empty?
|
1453
|
+
prefix = prefix + '_'
|
1454
|
+
end
|
1455
|
+
|
1456
|
+
# call special handler (if defined) or default handler
|
1457
|
+
begin
|
1458
|
+
send('_end_' + prefix + suffix) # NOTE no return here! do not add it!
|
1459
|
+
rescue NoMethodError => details
|
1460
|
+
pop(prefix + suffix)
|
1461
|
+
end
|
1462
|
+
|
1463
|
+
# track inline content
|
1464
|
+
if @incontent != 0 and @contentparams.has_key?'type' and /xml$/ =~ (@contentparams['type'] || 'xml')
|
1465
|
+
# element declared itself as escaped markup, but it isn't really
|
1466
|
+
@contentparams['type'] = 'application/xhtml+xml'
|
1467
|
+
end
|
1468
|
+
if @incontent != 0 and @contentparams['type'] == 'application/xhtml+xml'
|
1469
|
+
tag = tag.split(':')[-1]
|
1470
|
+
handle_data("</#{tag}>", escape=false)
|
1471
|
+
end
|
1472
|
+
|
1473
|
+
# track xml:base and xml:lang going out of scope
|
1474
|
+
if @basestack and not @basestack.empty?
|
1475
|
+
@basestack.pop
|
1476
|
+
if @basestack and @basestack[-1] and not (@basestack.empty? or @basestack[-1].empty?)
|
1477
|
+
@baseuri = @basestack[-1]
|
1478
|
+
end
|
1479
|
+
end
|
1480
|
+
if @langstack and not @langstack.empty?
|
1481
|
+
@langstack.pop
|
1482
|
+
if @langstack and not @langstack.empty? # and @langstack[-1] and not @langstack.empty?
|
1483
|
+
@lang = @langstack[-1]
|
1484
|
+
end
|
1485
|
+
end
|
1486
|
+
end
|
1487
|
+
|
1488
|
+
def handle_charref(ref)
|
1489
|
+
# LooseParserOnly
|
1490
|
+
# called for each character reference, e.g. for ' ', ref will be '160'
|
1491
|
+
$stderr << "entering handle_charref with #{ref}\n" if $debug
|
1492
|
+
return if @elementstack.nil? or @elementstack.empty?
|
1493
|
+
ref.downcase!
|
1494
|
+
chars = ['34', '38', '39', '60', '62', 'x22', 'x26', 'x27', 'x3c', 'x3e']
|
1495
|
+
if chars.include?ref
|
1496
|
+
text = "&##{ref};"
|
1497
|
+
else
|
1498
|
+
if ref[0..0] == 'x'
|
1499
|
+
c = (ref[1..-1]).to_i(16)
|
1500
|
+
else
|
1501
|
+
c = ref.to_i
|
1502
|
+
end
|
1503
|
+
text = uconvert(unichr(c),'unicode')
|
1504
|
+
end
|
1505
|
+
@elementstack[-1][2] << text
|
1506
|
+
end
|
1507
|
+
|
1508
|
+
def handle_entityref(ref)
|
1509
|
+
# LooseParserOnly
|
1510
|
+
# called for each entity reference, e.g. for '©', ref will be 'copy'
|
1511
|
+
|
1512
|
+
return if @elementstack.nil? or @elementstack.empty?
|
1513
|
+
$stderr << "entering handle_entityref with #{ref}\n" if $debug
|
1514
|
+
ents = ['lt', 'gt', 'quot', 'amp', 'apos']
|
1515
|
+
if ents.include?ref
|
1516
|
+
text = "&#{ref};"
|
1517
|
+
else
|
1518
|
+
text = HTMLEntities::decode_entities("&#{ref};")
|
1519
|
+
end
|
1520
|
+
@elementstack[-1][2] << text
|
1521
|
+
end
|
1522
|
+
|
1523
|
+
def handle_data(text, escape=true)
|
1524
|
+
# called for each block of plain text, i.e. outside of any tag and
|
1525
|
+
# not containing any character or entity references
|
1526
|
+
return if @elementstack.nil? or @elementstack.empty?
|
1527
|
+
if escape and @contentparams['type'] == 'application/xhtml+xml'
|
1528
|
+
text = text.to_xs
|
1529
|
+
end
|
1530
|
+
@elementstack[-1][2] << text
|
1531
|
+
end
|
1532
|
+
|
1533
|
+
def handle_comment(comment)
|
1534
|
+
# called for each comment, e.g. <!-- insert message here -->
|
1535
|
+
end
|
1536
|
+
|
1537
|
+
def handle_pi(text)
|
1538
|
+
end
|
1539
|
+
|
1540
|
+
def handle_decl(text)
|
1541
|
+
end
|
1542
|
+
|
1543
|
+
def parse_declaration(i)
|
1544
|
+
# for LooseFeedParser
|
1545
|
+
$stderr << "entering parse_declaration\n" if $debug
|
1546
|
+
if @rawdata[i...i+9] == '<![CDATA['
|
1547
|
+
k = @rawdata.index(/\]\]>/u,i+9)
|
1548
|
+
k = @rawdata.length unless k
|
1549
|
+
handle_data(@rawdata[i+9...k].to_xs,false)
|
1550
|
+
return k+3
|
1551
|
+
else
|
1552
|
+
k = @rawdata.index(/>/,i).to_i
|
1553
|
+
return k+1
|
1554
|
+
end
|
1555
|
+
end
|
1556
|
+
|
1557
|
+
def mapContentType(contentType)
|
1558
|
+
contentType.downcase!
|
1559
|
+
case contentType
|
1560
|
+
when 'text'
|
1561
|
+
contentType = 'text/plain'
|
1562
|
+
when 'html'
|
1563
|
+
contentType = 'text/html'
|
1564
|
+
when 'xhtml'
|
1565
|
+
contentType = 'application/xhtml+xml'
|
1566
|
+
end
|
1567
|
+
return contentType
|
1568
|
+
end
|
1569
|
+
|
1570
|
+
def trackNamespace(prefix, uri)
|
1571
|
+
|
1572
|
+
loweruri = uri.downcase.strip
|
1573
|
+
if [prefix, loweruri] == [nil, 'http://my.netscape.com/rdf/simple/0.9/'] and (@version.nil? or @version.empty?)
|
1574
|
+
@version = 'rss090'
|
1575
|
+
elsif loweruri == 'http://purl.org/rss/1.0/' and (@version.nil? or @version.empty?)
|
1576
|
+
@version = 'rss10'
|
1577
|
+
elsif loweruri == 'http://www.w3.org/2005/atom' and (@version.nil? or @version.empty?)
|
1578
|
+
@version = 'atom10'
|
1579
|
+
elsif /backend\.userland\.com\/rss/ =~ loweruri
|
1580
|
+
# match any backend.userland.com namespace
|
1581
|
+
uri = 'http://backend.userland.com/rss'
|
1582
|
+
loweruri = uri
|
1583
|
+
end
|
1584
|
+
if @matchnamespaces.has_key? loweruri
|
1585
|
+
@namespacemap[prefix] = @matchnamespaces[loweruri]
|
1586
|
+
@namespacesInUse[@matchnamespaces[loweruri]] = uri
|
1587
|
+
else
|
1588
|
+
@namespacesInUse[prefix || ''] = uri
|
1589
|
+
end
|
1590
|
+
end
|
1591
|
+
|
1592
|
+
def resolveURI(uri)
|
1593
|
+
return urljoin(@baseuri || '', uri)
|
1594
|
+
end
|
1595
|
+
|
1596
|
+
def decodeEntities(element, data)
|
1597
|
+
return data
|
1598
|
+
end
|
1599
|
+
|
1600
|
+
def push(element, expectingText)
|
1601
|
+
@elementstack << [element, expectingText, []]
|
1602
|
+
end
|
1603
|
+
|
1604
|
+
def pop(element, stripWhitespace=true)
|
1605
|
+
return if @elementstack.nil? or @elementstack.empty?
|
1606
|
+
return if @elementstack[-1][0] != element
|
1607
|
+
element, expectingText, pieces = @elementstack.pop
|
1608
|
+
if pieces.class == Array
|
1609
|
+
output = pieces.join('')
|
1610
|
+
else
|
1611
|
+
output = pieces
|
1612
|
+
end
|
1613
|
+
if stripWhitespace
|
1614
|
+
output.strip!
|
1615
|
+
end
|
1616
|
+
return output if not expectingText
|
1617
|
+
|
1618
|
+
# decode base64 content
|
1619
|
+
if @contentparams['base64']
|
1620
|
+
out64 = Base64::decode64(output) # a.k.a. [output].unpack('m')[0]
|
1621
|
+
if not output.empty? and not out64.empty?
|
1622
|
+
output = out64
|
1623
|
+
end
|
1624
|
+
end
|
1625
|
+
|
1626
|
+
# resolve relative URIs
|
1627
|
+
if @can_be_relative_uri.include?element and output and not output.empty?
|
1628
|
+
output = resolveURI(output)
|
1629
|
+
end
|
1630
|
+
|
1631
|
+
# decode entities within embedded markup
|
1632
|
+
if not @contentparams['base64']
|
1633
|
+
output = decodeEntities(element, output)
|
1634
|
+
end
|
1635
|
+
|
1636
|
+
# remove temporary cruft from contentparams
|
1637
|
+
@contentparams.delete('mode')
|
1638
|
+
@contentparams.delete('base64')
|
1639
|
+
|
1640
|
+
# resolve relative URIs within embedded markup
|
1641
|
+
if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
|
1642
|
+
if @can_contain_relative_uris.include?element
|
1643
|
+
output = FeedParser.resolveRelativeURIs(output, @baseuri, @encoding)
|
1644
|
+
end
|
1645
|
+
end
|
1646
|
+
# sanitize embedded markup
|
1647
|
+
if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
|
1648
|
+
if @can_contain_dangerous_markup.include?element
|
1649
|
+
output = FeedParser.sanitizeHTML(output, @encoding)
|
1650
|
+
end
|
1651
|
+
end
|
1652
|
+
|
1653
|
+
if @encoding and not @encoding.empty? and @encoding != 'utf-8'
|
1654
|
+
output = uconvert(output, @encoding, 'utf-8')
|
1655
|
+
# FIXME I turn everything into utf-8, not unicode, originally because REXML was being used but now beause I haven't tested it out yet.
|
1656
|
+
end
|
1657
|
+
|
1658
|
+
# categories/tags/keywords/whatever are handled in _end_category
|
1659
|
+
return output if element == 'category'
|
1660
|
+
|
1661
|
+
# store output in appropriate place(s)
|
1662
|
+
if @inentry and not @insource
|
1663
|
+
if element == 'content'
|
1664
|
+
@entries[-1][element] ||= []
|
1665
|
+
contentparams = Marshal.load(Marshal.dump(@contentparams)) # deepcopy
|
1666
|
+
contentparams['value'] = output
|
1667
|
+
@entries[-1][element] << contentparams
|
1668
|
+
elsif element == 'link'
|
1669
|
+
@entries[-1][element] = output
|
1670
|
+
if output and not output.empty?
|
1671
|
+
@entries[-1]['links'][-1]['href'] = output
|
1672
|
+
end
|
1673
|
+
else
|
1674
|
+
element = 'summary' if element == 'description'
|
1675
|
+
@entries[-1][element] = output
|
1676
|
+
if @incontent != 0
|
1677
|
+
contentparams = Marshal.load(Marshal.dump(@contentparams))
|
1678
|
+
contentparams['value'] = output
|
1679
|
+
@entries[-1][element + '_detail'] = contentparams
|
1680
|
+
end
|
1681
|
+
end
|
1682
|
+
elsif (@infeed or @insource) and not @intextinput and not @inimage
|
1683
|
+
context = getContext()
|
1684
|
+
element = 'subtitle' if element == 'description'
|
1685
|
+
context[element] = output
|
1686
|
+
if element == 'link'
|
1687
|
+
context['links'][-1]['href'] = output
|
1688
|
+
elsif @incontent != 0
|
1689
|
+
contentparams = Marshal.load(Marshal.dump(@contentparams))
|
1690
|
+
contentparams['value'] = output
|
1691
|
+
context[element + '_detail'] = contentparams
|
1692
|
+
end
|
1693
|
+
end
|
1694
|
+
return output
|
1695
|
+
end
|
1696
|
+
|
1697
|
+
def pushContent(tag, attrsD, defaultContentType, expectingText)
|
1698
|
+
@incontent += 1 # Yes, I hate this.
|
1699
|
+
type = mapContentType(attrsD['type'] || defaultContentType)
|
1700
|
+
@contentparams = FeedParserDict.new({'type' => type,'language' => @lang,'base' => @baseuri})
|
1701
|
+
@contentparams['base64'] = isBase64(attrsD, @contentparams)
|
1702
|
+
push(tag, expectingText)
|
1703
|
+
end
|
1704
|
+
|
1705
|
+
def popContent(tag)
|
1706
|
+
value = pop(tag)
|
1707
|
+
@incontent -= 1
|
1708
|
+
@contentparams.clear
|
1709
|
+
return value
|
1710
|
+
end
|
1711
|
+
|
1712
|
+
def mapToStandardPrefix(name)
|
1713
|
+
colonpos = name.index(':')
|
1714
|
+
if colonpos
|
1715
|
+
prefix = name[0..colonpos-1]
|
1716
|
+
suffix = name[colonpos+1..-1]
|
1717
|
+
prefix = @namespacemap[prefix] || prefix
|
1718
|
+
name = prefix + ':' + suffix
|
1719
|
+
end
|
1720
|
+
return name
|
1721
|
+
end
|
1722
|
+
|
1723
|
+
def getAttribute(attrsD, name)
|
1724
|
+
return attrsD[mapToStandardPrefix(name)]
|
1725
|
+
end
|
1726
|
+
|
1727
|
+
def isBase64(attrsD, contentparams)
|
1728
|
+
return true if (attrsD['mode'] == 'base64')
|
1729
|
+
if /(^text\/)|(\+xml$)|(\/xml$)/ =~ contentparams['type']
|
1730
|
+
return false
|
1731
|
+
end
|
1732
|
+
return true
|
1733
|
+
end
|
1734
|
+
|
1735
|
+
def itsAnHrefDamnIt(attrsD)
|
1736
|
+
href= attrsD['url'] || attrsD['uri'] || attrsD['href']
|
1737
|
+
if href
|
1738
|
+
attrsD.delete('url')
|
1739
|
+
attrsD.delete('uri')
|
1740
|
+
attrsD['href'] = href
|
1741
|
+
end
|
1742
|
+
return attrsD
|
1743
|
+
end
|
1744
|
+
|
1745
|
+
|
1746
|
+
def _save(key, value)
|
1747
|
+
context = getContext()
|
1748
|
+
context[key] ||= value
|
1749
|
+
end
|
1750
|
+
|
1751
|
+
def _start_rss(attrsD)
|
1752
|
+
versionmap = {'0.91' => 'rss091u',
|
1753
|
+
'0.92' => 'rss092',
|
1754
|
+
'0.93' => 'rss093',
|
1755
|
+
'0.94' => 'rss094'
|
1756
|
+
}
|
1757
|
+
|
1758
|
+
if not @version or @version.empty?
|
1759
|
+
attr_version = attrsD['version'] || ''
|
1760
|
+
version = versionmap[attr_version]
|
1761
|
+
if version and not version.empty?
|
1762
|
+
@version = version
|
1763
|
+
elsif /^2\./ =~ attr_version
|
1764
|
+
@version = 'rss20'
|
1765
|
+
else
|
1766
|
+
@version = 'rss'
|
1767
|
+
end
|
1768
|
+
end
|
1769
|
+
end
|
1770
|
+
|
1771
|
+
def _start_dlhottitles(attrsD)
|
1772
|
+
@version = 'hotrss'
|
1773
|
+
end
|
1774
|
+
|
1775
|
+
def _start_channel(attrsD)
|
1776
|
+
@infeed = true
|
1777
|
+
_cdf_common(attrsD)
|
1778
|
+
end
|
1779
|
+
alias :_start_feedinfo :_start_channel
|
1780
|
+
|
1781
|
+
def _cdf_common(attrsD)
|
1782
|
+
if attrsD.has_key?'lastmod'
|
1783
|
+
_start_modified({})
|
1784
|
+
@elementstack[-1][-1] = attrsD['lastmod']
|
1785
|
+
_end_modified
|
1786
|
+
end
|
1787
|
+
if attrsD.has_key?'href'
|
1788
|
+
_start_link({})
|
1789
|
+
@elementstack[-1][-1] = attrsD['href']
|
1790
|
+
_end_link
|
1791
|
+
end
|
1792
|
+
end
|
1793
|
+
|
1794
|
+
def _start_feed(attrsD)
|
1795
|
+
@infeed = true
|
1796
|
+
versionmap = {'0.1' => 'atom01',
|
1797
|
+
'0.2' => 'atom02',
|
1798
|
+
'0.3' => 'atom03'
|
1799
|
+
}
|
1800
|
+
|
1801
|
+
if not @version or @version.empty?
|
1802
|
+
attr_version = attrsD['version']
|
1803
|
+
version = versionmap[attr_version]
|
1804
|
+
if @version and not @version.empty?
|
1805
|
+
@version = version
|
1806
|
+
else
|
1807
|
+
@version = 'atom'
|
1808
|
+
end
|
1809
|
+
end
|
1810
|
+
end
|
1811
|
+
|
1812
|
+
def _end_channel
|
1813
|
+
@infeed = false
|
1814
|
+
end
|
1815
|
+
alias :_end_feed :_end_channel
|
1816
|
+
|
1817
|
+
def _start_image(attrsD)
|
1818
|
+
@inimage = true
|
1819
|
+
push('image', false)
|
1820
|
+
context = getContext()
|
1821
|
+
context['image'] ||= FeedParserDict.new
|
1822
|
+
end
|
1823
|
+
|
1824
|
+
def _end_image
|
1825
|
+
pop('image')
|
1826
|
+
@inimage = false
|
1827
|
+
end
|
1828
|
+
|
1829
|
+
def _start_textinput(attrsD)
|
1830
|
+
@intextinput = true
|
1831
|
+
push('textinput', false)
|
1832
|
+
context = getContext()
|
1833
|
+
context['textinput'] ||= FeedParserDict.new
|
1834
|
+
end
|
1835
|
+
alias :_start_textInput :_start_textinput
|
1836
|
+
|
1837
|
+
def _end_textinput
|
1838
|
+
pop('textinput')
|
1839
|
+
@intextinput = false
|
1840
|
+
end
|
1841
|
+
alias :_end_textInput :_end_textinput
|
1842
|
+
|
1843
|
+
def _start_author(attrsD)
|
1844
|
+
@inauthor = true
|
1845
|
+
push('author', true)
|
1846
|
+
end
|
1847
|
+
alias :_start_managingeditor :_start_author
|
1848
|
+
alias :_start_dc_author :_start_author
|
1849
|
+
alias :_start_dc_creator :_start_author
|
1850
|
+
alias :_start_itunes_author :_start_author
|
1851
|
+
|
1852
|
+
def _end_author
|
1853
|
+
pop('author')
|
1854
|
+
@inauthor = false
|
1855
|
+
_sync_author_detail()
|
1856
|
+
end
|
1857
|
+
alias :_end_managingeditor :_end_author
|
1858
|
+
alias :_end_dc_author :_end_author
|
1859
|
+
alias :_end_dc_creator :_end_author
|
1860
|
+
alias :_end_itunes_author :_end_author
|
1861
|
+
|
1862
|
+
def _start_itunes_owner(attrsD)
|
1863
|
+
@inpublisher = true
|
1864
|
+
push('publisher', false)
|
1865
|
+
end
|
1866
|
+
|
1867
|
+
def _end_itunes_owner
|
1868
|
+
pop('publisher')
|
1869
|
+
@inpublisher = false
|
1870
|
+
_sync_author_detail('publisher')
|
1871
|
+
end
|
1872
|
+
|
1873
|
+
def _start_contributor(attrsD)
|
1874
|
+
@incontributor = true
|
1875
|
+
context = getContext()
|
1876
|
+
context['contributors'] ||= []
|
1877
|
+
context['contributors'] << FeedParserDict.new
|
1878
|
+
push('contributor', false)
|
1879
|
+
end
|
1880
|
+
|
1881
|
+
def _end_contributor
|
1882
|
+
pop('contributor')
|
1883
|
+
@incontributor = false
|
1884
|
+
end
|
1885
|
+
|
1886
|
+
def _start_dc_contributor(attrsD)
|
1887
|
+
@incontributor = true
|
1888
|
+
context = getContext()
|
1889
|
+
context['contributors'] ||= []
|
1890
|
+
context['contributors'] << FeedParserDict.new
|
1891
|
+
push('name', false)
|
1892
|
+
end
|
1893
|
+
|
1894
|
+
def _end_dc_contributor
|
1895
|
+
_end_name
|
1896
|
+
@incontributor = false
|
1897
|
+
end
|
1898
|
+
|
1899
|
+
def _start_name(attrsD)
|
1900
|
+
push('name', false)
|
1901
|
+
end
|
1902
|
+
alias :_start_itunes_name :_start_name
|
1903
|
+
|
1904
|
+
def _end_name
|
1905
|
+
value = pop('name')
|
1906
|
+
if @inpublisher
|
1907
|
+
_save_author('name', value, 'publisher')
|
1908
|
+
elsif @inauthor
|
1909
|
+
_save_author('name', value)
|
1910
|
+
elsif @incontributor
|
1911
|
+
_save_contributor('name', value)
|
1912
|
+
elsif @intextinput
|
1913
|
+
context = getContext()
|
1914
|
+
context['textinput']['name'] = value
|
1915
|
+
end
|
1916
|
+
end
|
1917
|
+
alias :_end_itunes_name :_end_name
|
1918
|
+
|
1919
|
+
def _start_width(attrsD)
|
1920
|
+
push('width', false)
|
1921
|
+
end
|
1922
|
+
|
1923
|
+
def _end_width
|
1924
|
+
value = pop('width').to_i
|
1925
|
+
if @inimage
|
1926
|
+
context = getContext
|
1927
|
+
context['image']['width'] = value
|
1928
|
+
end
|
1929
|
+
end
|
1930
|
+
|
1931
|
+
def _start_height(attrsD)
|
1932
|
+
push('height', false)
|
1933
|
+
end
|
1934
|
+
|
1935
|
+
def _end_height
|
1936
|
+
value = pop('height').to_i
|
1937
|
+
if @inimage
|
1938
|
+
context = getContext()
|
1939
|
+
context['image']['height'] = value
|
1940
|
+
end
|
1941
|
+
end
|
1942
|
+
|
1943
|
+
def _start_url(attrsD)
|
1944
|
+
push('href', true)
|
1945
|
+
end
|
1946
|
+
alias :_start_homepage :_start_url
|
1947
|
+
alias :_start_uri :_start_url
|
1948
|
+
|
1949
|
+
def _end_url
|
1950
|
+
value = pop('href')
|
1951
|
+
if @inauthor
|
1952
|
+
_save_author('href', value)
|
1953
|
+
elsif @incontributor
|
1954
|
+
_save_contributor('href', value)
|
1955
|
+
elsif @inimage
|
1956
|
+
context = getContext()
|
1957
|
+
context['image']['href'] = value
|
1958
|
+
elsif @intextinput
|
1959
|
+
context = getContext()
|
1960
|
+
context['textinput']['link'] = value
|
1961
|
+
end
|
1962
|
+
end
|
1963
|
+
alias :_end_homepage :_end_url
|
1964
|
+
alias :_end_uri :_end_url
|
1965
|
+
|
1966
|
+
def _start_email(attrsD)
|
1967
|
+
push('email', false)
|
1968
|
+
end
|
1969
|
+
alias :_start_itunes_email :_start_email
|
1970
|
+
|
1971
|
+
def _end_email
|
1972
|
+
value = pop('email')
|
1973
|
+
if @inpublisher
|
1974
|
+
_save_author('email', value, 'publisher')
|
1975
|
+
elsif @inauthor
|
1976
|
+
_save_author('email', value)
|
1977
|
+
elsif @incontributor
|
1978
|
+
_save_contributor('email', value)
|
1979
|
+
end
|
1980
|
+
end
|
1981
|
+
alias :_end_itunes_email :_end_email
|
1982
|
+
|
1983
|
+
def getContext
|
1984
|
+
if @insource
|
1985
|
+
context = @sourcedata
|
1986
|
+
elsif @inentry
|
1987
|
+
context = @entries[-1]
|
1988
|
+
else
|
1989
|
+
context = @feeddata
|
1990
|
+
end
|
1991
|
+
return context
|
1992
|
+
end
|
1993
|
+
|
1994
|
+
def _save_author(key, value, prefix='author')
|
1995
|
+
context = getContext()
|
1996
|
+
context[prefix + '_detail'] ||= FeedParserDict.new
|
1997
|
+
context[prefix + '_detail'][key] = value
|
1998
|
+
_sync_author_detail()
|
1999
|
+
end
|
2000
|
+
|
2001
|
+
def _save_contributor(key, value)
|
2002
|
+
context = getContext
|
2003
|
+
context['contributors'] ||= [FeedParserDict.new]
|
2004
|
+
context['contributors'][-1][key] = value
|
2005
|
+
end
|
2006
|
+
|
2007
|
+
def _sync_author_detail(key='author')
|
2008
|
+
context = getContext()
|
2009
|
+
detail = context["#{key}_detail"]
|
2010
|
+
if detail and not detail.empty?
|
2011
|
+
name = detail['name']
|
2012
|
+
email = detail['email']
|
2013
|
+
|
2014
|
+
if name and email and not (name.empty? or name.empty?)
|
2015
|
+
context[key] = "#{name} (#{email})"
|
2016
|
+
elsif name and not name.empty?
|
2017
|
+
context[key] = name
|
2018
|
+
elsif email and not email.empty?
|
2019
|
+
context[key] = email
|
2020
|
+
end
|
2021
|
+
else
|
2022
|
+
author = context[key].dup unless context[key].nil?
|
2023
|
+
return if not author or author.empty?
|
2024
|
+
emailmatch = author.match(/(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))/)
|
2025
|
+
email = emailmatch[1]
|
2026
|
+
author.gsub!(email, '')
|
2027
|
+
author.gsub!("\(\)", '')
|
2028
|
+
author.strip!
|
2029
|
+
author.gsub!(/^\(/,'')
|
2030
|
+
author.gsub!(/\)$/,'')
|
2031
|
+
author.strip!
|
2032
|
+
context["#{key}_detail"] ||= FeedParserDict.new
|
2033
|
+
context["#{key}_detail"]['name'] = author
|
2034
|
+
context["#{key}_detail"]['email'] = email
|
2035
|
+
end
|
2036
|
+
end
|
2037
|
+
|
2038
|
+
def _start_subtitle(attrsD)
|
2039
|
+
pushContent('subtitle', attrsD, 'text/plain', true)
|
2040
|
+
end
|
2041
|
+
alias :_start_tagline :_start_subtitle
|
2042
|
+
alias :_start_itunes_subtitle :_start_subtitle
|
2043
|
+
|
2044
|
+
def _end_subtitle
|
2045
|
+
popContent('subtitle')
|
2046
|
+
end
|
2047
|
+
alias :_end_tagline :_end_subtitle
|
2048
|
+
alias :_end_itunes_subtitle :_end_subtitle
|
2049
|
+
|
2050
|
+
def _start_rights(attrsD)
|
2051
|
+
pushContent('rights', attrsD, 'text/plain', true)
|
2052
|
+
end
|
2053
|
+
alias :_start_dc_rights :_start_rights
|
2054
|
+
alias :_start_copyright :_start_rights
|
2055
|
+
|
2056
|
+
def _end_rights
|
2057
|
+
popContent('rights')
|
2058
|
+
end
|
2059
|
+
alias :_end_dc_rights :_end_rights
|
2060
|
+
alias :_end_copyright :_end_rights
|
2061
|
+
|
2062
|
+
def _start_item(attrsD)
|
2063
|
+
@entries << FeedParserDict.new
|
2064
|
+
push('item', false)
|
2065
|
+
@inentry = true
|
2066
|
+
@guidislink = false
|
2067
|
+
id = getAttribute(attrsD, 'rdf:about')
|
2068
|
+
if id and not id.empty?
|
2069
|
+
context = getContext()
|
2070
|
+
context['id'] = id
|
2071
|
+
end
|
2072
|
+
_cdf_common(attrsD)
|
2073
|
+
end
|
2074
|
+
alias :_start_entry :_start_item
|
2075
|
+
alias :_start_product :_start_item
|
2076
|
+
|
2077
|
+
def _end_item
|
2078
|
+
pop('item')
|
2079
|
+
@inentry = false
|
2080
|
+
end
|
2081
|
+
alias :_end_entry :_end_item
|
2082
|
+
|
2083
|
+
def _start_dc_language(attrsD)
|
2084
|
+
push('language', true)
|
2085
|
+
end
|
2086
|
+
alias :_start_language :_start_dc_language
|
2087
|
+
|
2088
|
+
def _end_dc_language
|
2089
|
+
@lang = pop('language')
|
2090
|
+
end
|
2091
|
+
alias :_end_language :_end_dc_language
|
2092
|
+
|
2093
|
+
def _start_dc_publisher(attrsD)
|
2094
|
+
push('publisher', true)
|
2095
|
+
end
|
2096
|
+
alias :_start_webmaster :_start_dc_publisher
|
2097
|
+
|
2098
|
+
def _end_dc_publisher
|
2099
|
+
pop('publisher')
|
2100
|
+
_sync_author_detail('publisher')
|
2101
|
+
end
|
2102
|
+
alias :_end_webmaster :_end_dc_publisher
|
2103
|
+
|
2104
|
+
def _start_published(attrsD)
|
2105
|
+
push('published', true)
|
2106
|
+
end
|
2107
|
+
alias :_start_dcterms_issued :_start_published
|
2108
|
+
alias :_start_issued :_start_published
|
2109
|
+
|
2110
|
+
def _end_published
|
2111
|
+
value = pop('published')
|
2112
|
+
_save('published_parsed', parse_date(value))
|
2113
|
+
end
|
2114
|
+
alias :_end_dcterms_issued :_end_published
|
2115
|
+
alias :_end_issued :_end_published
|
2116
|
+
|
2117
|
+
def _start_updated(attrsD)
|
2118
|
+
push('updated', true)
|
2119
|
+
end
|
2120
|
+
alias :_start_modified :_start_updated
|
2121
|
+
alias :_start_dcterms_modified :_start_updated
|
2122
|
+
alias :_start_pubdate :_start_updated
|
2123
|
+
alias :_start_dc_date :_start_updated
|
2124
|
+
|
2125
|
+
def _end_updated
|
2126
|
+
value = pop('updated')
|
2127
|
+
_save('updated_parsed', parse_date(value))
|
2128
|
+
end
|
2129
|
+
alias :_end_modified :_end_updated
|
2130
|
+
alias :_end_dcterms_modified :_end_updated
|
2131
|
+
alias :_end_pubdate :_end_updated
|
2132
|
+
alias :_end_dc_date :_end_updated
|
2133
|
+
|
2134
|
+
def _start_created(attrsD)
|
2135
|
+
push('created', true)
|
2136
|
+
end
|
2137
|
+
alias :_start_dcterms_created :_start_created
|
2138
|
+
|
2139
|
+
def _end_created
|
2140
|
+
value = pop('created')
|
2141
|
+
_save('created_parsed', parse_date(value))
|
2142
|
+
end
|
2143
|
+
alias :_end_dcterms_created :_end_created
|
2144
|
+
|
2145
|
+
def _start_expirationdate(attrsD)
|
2146
|
+
push('expired', true)
|
2147
|
+
end
|
2148
|
+
def _end_expirationdate
|
2149
|
+
_save('expired_parsed', parse_date(pop('expired')))
|
2150
|
+
end
|
2151
|
+
|
2152
|
+
def _start_cc_license(attrsD)
|
2153
|
+
push('license', true)
|
2154
|
+
value = getAttribute(attrsD, 'rdf:resource')
|
2155
|
+
if value and not value.empty?
|
2156
|
+
elementstack[-1][2] << value
|
2157
|
+
pop('license')
|
2158
|
+
end
|
2159
|
+
end
|
2160
|
+
|
2161
|
+
def _start_creativecommons_license(attrsD)
|
2162
|
+
push('license', true)
|
2163
|
+
end
|
2164
|
+
|
2165
|
+
def _end_creativecommons_license
|
2166
|
+
pop('license')
|
2167
|
+
end
|
2168
|
+
|
2169
|
+
def addTag(term, scheme, label)
|
2170
|
+
context = getContext()
|
2171
|
+
context['tags'] ||= []
|
2172
|
+
tags = context['tags']
|
2173
|
+
if (term.nil? or term.empty?) and (scheme.nil? or scheme.empty?) and (label.nil? or label.empty?)
|
2174
|
+
return
|
2175
|
+
end
|
2176
|
+
value = FeedParserDict.new({'term' => term, 'scheme' => scheme, 'label' => label})
|
2177
|
+
if not tags.include?value
|
2178
|
+
context['tags'] << FeedParserDict.new({'term' => term, 'scheme' => scheme, 'label' => label})
|
2179
|
+
end
|
2180
|
+
end
|
2181
|
+
|
2182
|
+
def _start_category(attrsD)
|
2183
|
+
$stderr << "entering _start_category with #{attrsD}\n" if $debug
|
2184
|
+
|
2185
|
+
term = attrsD['term']
|
2186
|
+
scheme = attrsD['scheme'] || attrsD['domain']
|
2187
|
+
label = attrsD['label']
|
2188
|
+
addTag(term, scheme, label)
|
2189
|
+
push('category', true)
|
2190
|
+
end
|
2191
|
+
alias :_start_dc_subject :_start_category
|
2192
|
+
alias :_start_keywords :_start_category
|
2193
|
+
|
2194
|
+
def _end_itunes_keywords
|
2195
|
+
pop('itunes_keywords').split.each do |term|
|
2196
|
+
addTag(term, 'http://www.itunes.com/', nil)
|
2197
|
+
end
|
2198
|
+
end
|
2199
|
+
|
2200
|
+
def _start_itunes_category(attrsD)
|
2201
|
+
addTag(attrsD['text'], 'http://www.itunes.com/', nil)
|
2202
|
+
push('category', true)
|
2203
|
+
end
|
2204
|
+
|
2205
|
+
def _end_category
|
2206
|
+
value = pop('category')
|
2207
|
+
return if value.nil? or value.empty?
|
2208
|
+
context = getContext()
|
2209
|
+
tags = context['tags']
|
2210
|
+
if value and not value.empty? and not tags.empty? and not tags[-1]['term']:
|
2211
|
+
tags[-1]['term'] = value
|
2212
|
+
else
|
2213
|
+
addTag(value, nil, nil)
|
2214
|
+
end
|
2215
|
+
end
|
2216
|
+
alias :_end_dc_subject :_end_category
|
2217
|
+
alias :_end_keywords :_end_category
|
2218
|
+
alias :_end_itunes_category :_end_category
|
2219
|
+
|
2220
|
+
def _start_cloud(attrsD)
|
2221
|
+
getContext()['cloud'] = FeedParserDict.new(attrsD)
|
2222
|
+
end
|
2223
|
+
|
2224
|
+
def _start_link(attrsD)
|
2225
|
+
attrsD['rel'] ||= 'alternate'
|
2226
|
+
attrsD['type'] ||= 'text/html'
|
2227
|
+
attrsD = itsAnHrefDamnIt(attrsD)
|
2228
|
+
if attrsD.has_key? 'href'
|
2229
|
+
attrsD['href'] = resolveURI(attrsD['href'])
|
2230
|
+
end
|
2231
|
+
expectingText = @infeed || @inentry || @insource
|
2232
|
+
context = getContext()
|
2233
|
+
context['links'] ||= []
|
2234
|
+
context['links'] << FeedParserDict.new(attrsD)
|
2235
|
+
if attrsD['rel'] == 'enclosure'
|
2236
|
+
_start_enclosure(attrsD)
|
2237
|
+
end
|
2238
|
+
if attrsD.has_key? 'href'
|
2239
|
+
expectingText = false
|
2240
|
+
if (attrsD['rel'] == 'alternate') and @html_types.include?mapContentType(attrsD['type'])
|
2241
|
+
context['link'] = attrsD['href']
|
2242
|
+
end
|
2243
|
+
else
|
2244
|
+
push('link', expectingText)
|
2245
|
+
end
|
2246
|
+
end
|
2247
|
+
alias :_start_producturl :_start_link
|
2248
|
+
|
2249
|
+
def _end_link
|
2250
|
+
value = pop('link')
|
2251
|
+
context = getContext()
|
2252
|
+
if @intextinput
|
2253
|
+
context['textinput']['link'] = value
|
2254
|
+
end
|
2255
|
+
if @inimage
|
2256
|
+
context['image']['link'] = value
|
2257
|
+
end
|
2258
|
+
end
|
2259
|
+
alias :_end_producturl :_end_link
|
2260
|
+
|
2261
|
+
def _start_guid(attrsD)
|
2262
|
+
@guidislink = ((attrsD['ispermalink'] || 'true') == 'true')
|
2263
|
+
push('id', true)
|
2264
|
+
end
|
2265
|
+
|
2266
|
+
def _end_guid
|
2267
|
+
value = pop('id')
|
2268
|
+
_save('guidislink', (@guidislink and not getContext().has_key?('link')))
|
2269
|
+
if @guidislink:
|
2270
|
+
# guid acts as link, but only if 'ispermalink' is not present or is 'true',
|
2271
|
+
# and only if the item doesn't already have a link element
|
2272
|
+
_save('link', value)
|
2273
|
+
end
|
2274
|
+
end
|
2275
|
+
|
2276
|
+
|
2277
|
+
def _start_title(attrsD)
|
2278
|
+
pushContent('title', attrsD, 'text/plain', @infeed || @inentry || @insource)
|
2279
|
+
end
|
2280
|
+
alias :_start_dc_title :_start_title
|
2281
|
+
alias :_start_media_title :_start_title
|
2282
|
+
|
2283
|
+
def _end_title
|
2284
|
+
value = popContent('title')
|
2285
|
+
context = getContext()
|
2286
|
+
if @intextinput
|
2287
|
+
context['textinput']['title'] = value
|
2288
|
+
elsif @inimage
|
2289
|
+
context['image']['title'] = value
|
2290
|
+
end
|
2291
|
+
end
|
2292
|
+
alias :_end_dc_title :_end_title
|
2293
|
+
alias :_end_media_title :_end_title
|
2294
|
+
|
2295
|
+
def _start_description(attrsD)
|
2296
|
+
context = getContext()
|
2297
|
+
if context.has_key?('summary')
|
2298
|
+
@summaryKey = 'content'
|
2299
|
+
_start_content(attrsD)
|
2300
|
+
else
|
2301
|
+
pushContent('description', attrsD, 'text/html', @infeed || @inentry || @insource)
|
2302
|
+
end
|
2303
|
+
end
|
2304
|
+
|
2305
|
+
def _start_abstract(attrsD)
|
2306
|
+
pushContent('description', attrsD, 'text/plain', @infeed || @inentry || @insource)
|
2307
|
+
end
|
2308
|
+
|
2309
|
+
def _end_description
|
2310
|
+
if @summaryKey == 'content'
|
2311
|
+
_end_content()
|
2312
|
+
else
|
2313
|
+
value = popContent('description')
|
2314
|
+
context = getContext()
|
2315
|
+
if @intextinput
|
2316
|
+
context['textinput']['description'] = value
|
2317
|
+
elsif @inimage:
|
2318
|
+
context['image']['description'] = value
|
2319
|
+
end
|
2320
|
+
end
|
2321
|
+
@summaryKey = nil
|
2322
|
+
end
|
2323
|
+
alias :_end_abstract :_end_description
|
2324
|
+
|
2325
|
+
def _start_info(attrsD)
|
2326
|
+
pushContent('info', attrsD, 'text/plain', true)
|
2327
|
+
end
|
2328
|
+
alias :_start_feedburner_browserfriendly :_start_info
|
2329
|
+
|
2330
|
+
def _end_info
|
2331
|
+
popContent('info')
|
2332
|
+
end
|
2333
|
+
alias :_end_feedburner_browserfriendly :_end_info
|
2334
|
+
|
2335
|
+
def _start_generator(attrsD)
|
2336
|
+
if attrsD and not attrsD.empty?
|
2337
|
+
attrsD = itsAnHrefDamnIt(attrsD)
|
2338
|
+
if attrsD.has_key?('href')
|
2339
|
+
attrsD['href'] = resolveURI(attrsD['href'])
|
2340
|
+
end
|
2341
|
+
end
|
2342
|
+
getContext()['generator_detail'] = FeedParserDict.new(attrsD)
|
2343
|
+
push('generator', true)
|
2344
|
+
end
|
2345
|
+
|
2346
|
+
def _end_generator
|
2347
|
+
value = pop('generator')
|
2348
|
+
context = getContext()
|
2349
|
+
if context.has_key?('generator_detail')
|
2350
|
+
context['generator_detail']['name'] = value
|
2351
|
+
end
|
2352
|
+
end
|
2353
|
+
|
2354
|
+
def _start_admin_generatoragent(attrsD)
|
2355
|
+
push('generator', true)
|
2356
|
+
value = getAttribute(attrsD, 'rdf:resource')
|
2357
|
+
if value and not value.empty?
|
2358
|
+
elementstack[-1][2] << value
|
2359
|
+
end
|
2360
|
+
pop('generator')
|
2361
|
+
getContext()['generator_detail'] = FeedParserDict.new({'href' => value})
|
2362
|
+
end
|
2363
|
+
|
2364
|
+
def _start_admin_errorreportsto(attrsD)
|
2365
|
+
push('errorreportsto', true)
|
2366
|
+
value = getAttribute(attrsD, 'rdf:resource')
|
2367
|
+
if value and not value.empty?
|
2368
|
+
@elementstack[-1][2] << value
|
2369
|
+
end
|
2370
|
+
pop('errorreportsto')
|
2371
|
+
end
|
2372
|
+
|
2373
|
+
def _start_summary(attrsD)
|
2374
|
+
context = getContext()
|
2375
|
+
if context.has_key?'summary'
|
2376
|
+
@summaryKey = 'content'
|
2377
|
+
_start_content(attrsD)
|
2378
|
+
else
|
2379
|
+
@summaryKey = 'summary'
|
2380
|
+
pushContent(@summaryKey, attrsD, 'text/plain', true)
|
2381
|
+
end
|
2382
|
+
end
|
2383
|
+
alias :_start_itunes_summary :_start_summary
|
2384
|
+
|
2385
|
+
def _end_summary
|
2386
|
+
if @summaryKey == 'content':
|
2387
|
+
_end_content()
|
2388
|
+
else
|
2389
|
+
popContent(@summaryKey || 'summary')
|
2390
|
+
end
|
2391
|
+
@summaryKey = nil
|
2392
|
+
end
|
2393
|
+
alias :_end_itunes_summary :_end_summary
|
2394
|
+
|
2395
|
+
def _start_enclosure(attrsD)
|
2396
|
+
attrsD = itsAnHrefDamnIt(attrsD)
|
2397
|
+
getContext()['enclosures'] ||= []
|
2398
|
+
getContext()['enclosures'] << FeedParserDict.new(attrsD)
|
2399
|
+
href = attrsD['href']
|
2400
|
+
if href and not href.empty?
|
2401
|
+
context = getContext()
|
2402
|
+
if not context['id']
|
2403
|
+
context['id'] = href
|
2404
|
+
end
|
2405
|
+
end
|
2406
|
+
end
|
2407
|
+
|
2408
|
+
def _start_source(attrsD)
|
2409
|
+
@insource = true
|
2410
|
+
end
|
2411
|
+
|
2412
|
+
def _end_source
|
2413
|
+
@insource = false
|
2414
|
+
getContext()['source'] = Marshal.load(Marshal.dump(@sourcedata))
|
2415
|
+
@sourcedata.clear()
|
2416
|
+
end
|
2417
|
+
|
2418
|
+
def _start_content(attrsD)
|
2419
|
+
pushContent('content', attrsD, 'text/plain', true)
|
2420
|
+
src = attrsD['src']
|
2421
|
+
if src and not src.empty?:
|
2422
|
+
@contentparams['src'] = src
|
2423
|
+
end
|
2424
|
+
push('content', true)
|
2425
|
+
end
|
2426
|
+
|
2427
|
+
def _start_prodlink(attrsD)
|
2428
|
+
pushContent('content', attrsD, 'text/html', true)
|
2429
|
+
end
|
2430
|
+
|
2431
|
+
def _start_body(attrsD)
|
2432
|
+
pushContent('content', attrsD, 'application/xhtml+xml', true)
|
2433
|
+
end
|
2434
|
+
alias :_start_xhtml_body :_start_body
|
2435
|
+
|
2436
|
+
def _start_content_encoded(attrsD)
|
2437
|
+
pushContent('content', attrsD, 'text/html', true)
|
2438
|
+
end
|
2439
|
+
alias :_start_fullitem :_start_content_encoded
|
2440
|
+
|
2441
|
+
def _end_content
|
2442
|
+
copyToDescription = (['text/plain'] + @html_types).include? mapContentType(@contentparams['type'])
|
2443
|
+
value = popContent('content')
|
2444
|
+
if copyToDescription
|
2445
|
+
_save('description', value)
|
2446
|
+
end
|
2447
|
+
alias :_end_body :_end_content
|
2448
|
+
alias :_end_xhtml_body :_end_content
|
2449
|
+
alias :_end_content_encoded :_end_content
|
2450
|
+
alias :_end_fullitem :_end_content
|
2451
|
+
alias :_end_prodlink :_end_content
|
2452
|
+
end
|
2453
|
+
|
2454
|
+
def _start_itunes_image(attrsD)
|
2455
|
+
push('itunes_image', false)
|
2456
|
+
getContext()['image'] = FeedParserDict.new({'href' => attrsD['href']})
|
2457
|
+
end
|
2458
|
+
alias :_start_itunes_link :_start_itunes_image
|
2459
|
+
|
2460
|
+
def _end_itunes_block
|
2461
|
+
value = pop('itunes_block', false)
|
2462
|
+
getContext()['itunes_block'] = (value == 'yes') and true or false
|
2463
|
+
end
|
2464
|
+
|
2465
|
+
def _end_itunes_explicit
|
2466
|
+
value = pop('itunes_explicit', false)
|
2467
|
+
getContext()['itunes_explicit'] = (value == 'yes') and true or false
|
2468
|
+
end
|
2469
|
+
|
2470
|
+
|
2471
|
+
# ISO-8601 date parsing routines written by Fazal Majid.
|
2472
|
+
# The ISO 8601 standard is very convoluted and irregular - a full ISO 8601
|
2473
|
+
# parser is beyond the scope of feedparser and the current Time.iso8601
|
2474
|
+
# method does not work.
|
2475
|
+
# A single regular expression cannot parse ISO 8601 date formats into groups
|
2476
|
+
# as the standard is highly irregular (for instance is 030104 2003-01-04 or
|
2477
|
+
# 0301-04-01), so we use templates instead.
|
2478
|
+
# Please note the order in templates is significant because we need a
|
2479
|
+
# greedy match.
|
2480
|
+
def _parse_date_iso8601(dateString)
|
2481
|
+
# Parse a variety of ISO-8601-compatible formats like 20040105
|
2482
|
+
|
2483
|
+
# What I'm about to show you may be the ugliest code in all of
|
2484
|
+
# rfeedparser.
|
2485
|
+
# FIXME The century regexp maybe not work ('\d\d$' says "two numbers at
|
2486
|
+
# end of line" but we then attach more of a regexp.
|
2487
|
+
iso8601_regexps = [ '^(\d{4})-?([01]\d)-([0123]\d)',
|
2488
|
+
'^(\d{4})-([01]\d)',
|
2489
|
+
'^(\d{4})-?([0123]\d\d)',
|
2490
|
+
'^(\d\d)-?([01]\d)-?([0123]\d)',
|
2491
|
+
'^(\d\d)-?([0123]\d\d)',
|
2492
|
+
'^(\d{4})',
|
2493
|
+
'-(\d\d)-?([01]\d)',
|
2494
|
+
'-([0123]\d\d)',
|
2495
|
+
'-(\d\d)',
|
2496
|
+
'--([01]\d)-?([0123]\d)',
|
2497
|
+
'--([01]\d)',
|
2498
|
+
'---([0123]\d)',
|
2499
|
+
'(\d\d$)',
|
2500
|
+
''
|
2501
|
+
]
|
2502
|
+
iso8601_values = { '^(\d{4})-?([01]\d)-([0123]\d)' => ['year', 'month', 'day'],
|
2503
|
+
'^(\d{4})-([01]\d)' => ['year','month'],
|
2504
|
+
'^(\d{4})-?([0123]\d\d)' => ['year', 'ordinal'],
|
2505
|
+
'^(\d\d)-?([01]\d)-?([0123]\d)' => ['year','month','day'],
|
2506
|
+
'^(\d\d)-?([0123]\d\d)' => ['year','ordinal'],
|
2507
|
+
'^(\d{4})' => ['year'],
|
2508
|
+
'-(\d\d)-?([01]\d)' => ['year','month'],
|
2509
|
+
'-([0123]\d\d)' => ['ordinal'],
|
2510
|
+
'-(\d\d)' => ['year'],
|
2511
|
+
'--([01]\d)-?([0123]\d)' => ['month','day'],
|
2512
|
+
'--([01]\d)' => ['month'],
|
2513
|
+
'---([0123]\d)' => ['day'],
|
2514
|
+
'(\d\d$)' => ['century'],
|
2515
|
+
'' => []
|
2516
|
+
}
|
2517
|
+
add_to_all = '(T?(\d\d):(\d\d)(?::(\d\d))?([+-](\d\d)(?::(\d\d))?|Z)?)?'
|
2518
|
+
add_to_all_fields = ['hour', 'minute', 'second', 'tz', 'tzhour', 'tzmin']
|
2519
|
+
# NOTE We use '(?:' to prevent grouping of optional matches (ones trailed
|
2520
|
+
# by '?'). The second ':' *are* matched.
|
2521
|
+
m = nil
|
2522
|
+
param_keys = []
|
2523
|
+
iso8601_regexps.each do |s|
|
2524
|
+
$stderr << "Trying iso8601 regexp: #{s+add_to_all}\n" if $debug
|
2525
|
+
param_keys = iso8601_values[s] + add_to_all_fields
|
2526
|
+
m = dateString.match(Regexp.new(s+add_to_all))
|
2527
|
+
break if m
|
2528
|
+
end
|
2529
|
+
return if m.nil? or (m.begin(0).zero? and m.end(0).zero?)
|
2530
|
+
|
2531
|
+
param_values = m.to_a
|
2532
|
+
param_values = param_values[1..-1]
|
2533
|
+
params = {}
|
2534
|
+
param_keys.each_with_index do |key,i|
|
2535
|
+
params[key] = param_values[i]
|
2536
|
+
end
|
2537
|
+
|
2538
|
+
ordinal = params['ordinal'].to_i unless params['ordinal'].nil?
|
2539
|
+
year = params['year'] || '--'
|
2540
|
+
if year.nil? or year.empty? or year == '--' # FIXME When could the regexp ever return a year equal to '--'?
|
2541
|
+
year = Time.now.utc.year
|
2542
|
+
elsif year.length == 2
|
2543
|
+
# ISO 8601 assumes current century, i.e. 93 -> 2093, NOT 1993
|
2544
|
+
year = 100 * (Time.now.utc.year / 100) + year.to_i
|
2545
|
+
else
|
2546
|
+
year = year.to_i
|
2547
|
+
end
|
2548
|
+
|
2549
|
+
month = params['month'] || '-'
|
2550
|
+
if month.nil? or month.empty? or month == '-'
|
2551
|
+
# ordinals are NOT normalized by mktime, we simulate them
|
2552
|
+
# by setting month=1, day=ordinal
|
2553
|
+
if ordinal
|
2554
|
+
month = DateTime.ordinal(year,ordinal).month
|
2555
|
+
else
|
2556
|
+
month = Time.now.utc.month
|
2557
|
+
end
|
2558
|
+
end
|
2559
|
+
month = month.to_i unless month.nil?
|
2560
|
+
day = params['day']
|
2561
|
+
if day.nil? or day.empty?
|
2562
|
+
# see above
|
2563
|
+
if ordinal
|
2564
|
+
day = DateTime.ordinal(year,ordinal).day
|
2565
|
+
elsif params['century'] or params['year'] or params['month']
|
2566
|
+
day = 1
|
2567
|
+
else
|
2568
|
+
day = Time.now.utc.day
|
2569
|
+
end
|
2570
|
+
else
|
2571
|
+
day = day.to_i
|
2572
|
+
end
|
2573
|
+
# special case of the century - is the first year of the 21st century
|
2574
|
+
# 2000 or 2001 ? The debate goes on...
|
2575
|
+
if params.has_key? 'century'
|
2576
|
+
year = (params['century'].to_i - 1) * 100 + 1
|
2577
|
+
end
|
2578
|
+
# in ISO 8601 most fields are optional
|
2579
|
+
hour = params['hour'].to_i
|
2580
|
+
minute = params['minute'].to_i
|
2581
|
+
second = params['second'].to_i
|
2582
|
+
weekday = nil
|
2583
|
+
# daylight savings is complex, but not needed for feedparser's purposes
|
2584
|
+
# as time zones, if specified, include mention of whether it is active
|
2585
|
+
# (e.g. PST vs. PDT, CET). Using -1 is implementation-dependent and
|
2586
|
+
# and most implementations have DST bugs
|
2587
|
+
tm = [second, minute, hour, day, month, year, nil, ordinal, false, nil]
|
2588
|
+
tz = params['tz']
|
2589
|
+
if tz and not tz.empty? and tz != 'Z'
|
2590
|
+
# FIXME does this cross over days?
|
2591
|
+
if tz[0] == '-'
|
2592
|
+
tm[3] += params['tzhour'].to_i
|
2593
|
+
tm[4] += params['tzmin'].to_i
|
2594
|
+
elsif tz[0] == '+'
|
2595
|
+
tm[3] -= params['tzhour'].to_i
|
2596
|
+
tm[4] -= params['tzmin'].to_i
|
2597
|
+
else
|
2598
|
+
return nil
|
2599
|
+
end
|
2600
|
+
end
|
2601
|
+
return Time.utc(*tm) # Magic!
|
2602
|
+
|
2603
|
+
end
|
2604
|
+
|
2605
|
+
def _parse_date_onblog(dateString)
|
2606
|
+
# Parse a string according to the OnBlog 8-bit date format
|
2607
|
+
# 8-bit date handling routes written by ytrewq1
|
2608
|
+
korean_year = u("년") # b3e2 in euc-kr
|
2609
|
+
korean_month = u("월") # bff9 in euc-kr
|
2610
|
+
korean_day = u("일") # c0cf in euc-kr
|
2611
|
+
|
2612
|
+
|
2613
|
+
korean_onblog_date_re = /(\d{4})#{korean_year}\s+(\d{2})#{korean_month}\s+(\d{2})#{korean_day}\s+(\d{2}):(\d{2}):(\d{2})/
|
2614
|
+
|
2615
|
+
|
2616
|
+
m = korean_onblog_date_re.match(dateString)
|
2617
|
+
return unless m
|
2618
|
+
w3dtfdate = "#{m[1]}-#{m[2]}-#{m[3]}T#{m[4]}:#{m[5]}:#{m[6]}+09:00"
|
2619
|
+
|
2620
|
+
$stderr << "OnBlog date parsed as: %s\n" % w3dtfdate if $debug
|
2621
|
+
return _parse_date_w3dtf(w3dtfdate)
|
2622
|
+
end
|
2623
|
+
|
2624
|
+
def _parse_date_nate(dateString)
|
2625
|
+
# Parse a string according to the Nate 8-bit date format
|
2626
|
+
# 8-bit date handling routes written by ytrewq1
|
2627
|
+
korean_am = u("오전") # bfc0 c0fc in euc-kr
|
2628
|
+
korean_pm = u("오후") # bfc0 c8c4 in euc-kr
|
2629
|
+
|
2630
|
+
korean_nate_date_re = /(\d{4})-(\d{2})-(\d{2})\s+(#{korean_am}|#{korean_pm})\s+(\d{0,2}):(\d{0,2}):(\d{0,2})/
|
2631
|
+
m = korean_nate_date_re.match(dateString)
|
2632
|
+
return unless m
|
2633
|
+
hour = m[5].to_i
|
2634
|
+
ampm = m[4]
|
2635
|
+
if ampm == korean_pm
|
2636
|
+
hour += 12
|
2637
|
+
end
|
2638
|
+
hour = hour.to_s.rjust(2,'0')
|
2639
|
+
w3dtfdate = "#{m[1]}-#{m[2]}-#{m[3]}T#{hour}:#{m[6]}:#{m[7]}+09:00"
|
2640
|
+
$stderr << "Nate date parsed as: %s\n" % w3dtfdate if $debug
|
2641
|
+
return _parse_date_w3dtf(w3dtfdate)
|
2642
|
+
end
|
2643
|
+
|
2644
|
+
def _parse_date_mssql(dateString)
|
2645
|
+
mssql_date_re = /(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})(\.\d+)?/
|
2646
|
+
|
2647
|
+
m = mssql_date_re.match(dateString)
|
2648
|
+
return unless m
|
2649
|
+
w3dtfdate = "#{m[1]}-#{m[2]}-#{m[3]}T#{m[4]}:#{m[5]}:#{m[6]}+09:00"
|
2650
|
+
$stderr << "MS SQL date parsed as: %s\n" % w3dtfdate if $debug
|
2651
|
+
return _parse_date_w3dtf(w3dtfdate)
|
2652
|
+
end
|
2653
|
+
|
2654
|
+
def _parse_date_greek(dateString)
|
2655
|
+
# Parse a string according to a Greek 8-bit date format
|
2656
|
+
# Unicode strings for Greek date strings
|
2657
|
+
greek_months = {
|
2658
|
+
u("Ιαν") => u("Jan"), # c9e1ed in iso-8859-7
|
2659
|
+
u("Φεβ") => u("Feb"), # d6e5e2 in iso-8859-7
|
2660
|
+
u("Μάώ") => u("Mar"), # ccdcfe in iso-8859-7
|
2661
|
+
u("Μαώ") => u("Mar"), # cce1fe in iso-8859-7
|
2662
|
+
u("Απρ") => u("Apr"), # c1f0f1 in iso-8859-7
|
2663
|
+
u("Μάι") => u("May"), # ccdce9 in iso-8859-7
|
2664
|
+
u("Μαϊ") => u("May"), # cce1fa in iso-8859-7
|
2665
|
+
u("Μαι") => u("May"), # cce1e9 in iso-8859-7
|
2666
|
+
u("Ιούν") => u("Jun"), # c9effded in iso-8859-7
|
2667
|
+
u("Ιον") => u("Jun"), # c9efed in iso-8859-7
|
2668
|
+
u("Ιούλ") => u("Jul"), # c9effdeb in iso-8859-7
|
2669
|
+
u("Ιολ") => u("Jul"), # c9f9eb in iso-8859-7
|
2670
|
+
u("Αύγ") => u("Aug"), # c1fde3 in iso-8859-7
|
2671
|
+
u("Αυγ") => u("Aug"), # c1f5e3 in iso-8859-7
|
2672
|
+
u("Σεπ") => u("Sep"), # d3e5f0 in iso-8859-7
|
2673
|
+
u("Οκτ") => u("Oct"), # cfeaf4 in iso-8859-7
|
2674
|
+
u("Νοέ") => u("Nov"), # cdefdd in iso-8859-7
|
2675
|
+
u("Νοε") => u("Nov"), # cdefe5 in iso-8859-7
|
2676
|
+
u("Δεκ") => u("Dec"), # c4e5ea in iso-8859-7
|
2677
|
+
}
|
2678
|
+
|
2679
|
+
greek_wdays = {
|
2680
|
+
u("Κυρ") => u("Sun"), # caf5f1 in iso-8859-7
|
2681
|
+
u("Δευ") => u("Mon"), # c4e5f5 in iso-8859-7
|
2682
|
+
u("Τρι") => u("Tue"), # d4f1e9 in iso-8859-7
|
2683
|
+
u("Τετ") => u("Wed"), # d4e5f4 in iso-8859-7
|
2684
|
+
u("Πεμ") => u("Thu"), # d0e5ec in iso-8859-7
|
2685
|
+
u("Παρ") => u("Fri"), # d0e1f1 in iso-8859-7
|
2686
|
+
u("Σαβ") => u("Sat"), # d3e1e2 in iso-8859-7
|
2687
|
+
}
|
2688
|
+
|
2689
|
+
greek_date_format = /([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)/
|
2690
|
+
|
2691
|
+
m = greek_date_format.match(dateString)
|
2692
|
+
return unless m
|
2693
|
+
begin
|
2694
|
+
wday = greek_wdays[m[1]]
|
2695
|
+
month = greek_months[m[3]]
|
2696
|
+
rescue
|
2697
|
+
return nil
|
2698
|
+
end
|
2699
|
+
rfc822date = "#{wday}, #{m[2]} #{month} #{m[4]} #{m[5]}:#{m[6]}:#{m[7]} #{m[8]}"
|
2700
|
+
$stderr << "Greek date parsed as: #{rfc822date}\n" if $debug
|
2701
|
+
return _parse_date_rfc822(rfc822date)
|
2702
|
+
end
|
2703
|
+
|
2704
|
+
def _parse_date_hungarian(dateString)
|
2705
|
+
# Parse a string according to a Hungarian 8-bit date format.
|
2706
|
+
hungarian_date_format_re = /(\d{4})-([^-]+)-(\d{0,2})T(\d{0,2}):(\d{2})((\+|-)(\d{0,2}:\d{2}))/
|
2707
|
+
m = hungarian_date_format_re.match(dateString)
|
2708
|
+
return unless m
|
2709
|
+
|
2710
|
+
# Unicode strings for Hungarian date strings
|
2711
|
+
hungarian_months = {
|
2712
|
+
u("január") => u("01"), # e1 in iso-8859-2
|
2713
|
+
u("februári") => u("02"), # e1 in iso-8859-2
|
2714
|
+
u("március") => u("03"), # e1 in iso-8859-2
|
2715
|
+
u("április") => u("04"), # e1 in iso-8859-2
|
2716
|
+
u("máujus") => u("05"), # e1 in iso-8859-2
|
2717
|
+
u("június") => u("06"), # fa in iso-8859-2
|
2718
|
+
u("július") => u("07"), # fa in iso-8859-2
|
2719
|
+
u("augusztus") => u("08"),
|
2720
|
+
u("szeptember") => u("09"),
|
2721
|
+
u("október") => u("10"), # f3 in iso-8859-2
|
2722
|
+
u("november") => u("11"),
|
2723
|
+
u("december") => u("12"),
|
2724
|
+
}
|
2725
|
+
begin
|
2726
|
+
month = hungarian_months[m[2]]
|
2727
|
+
day = m[3].rjust(2,'0')
|
2728
|
+
hour = m[4].rjust(2,'0')
|
2729
|
+
rescue
|
2730
|
+
return
|
2731
|
+
end
|
2732
|
+
|
2733
|
+
w3dtfdate = "#{m[1]}-#{month}-#{day}T#{hour}:#{m[5]}:00#{m[6]}"
|
2734
|
+
$stderr << "Hungarian date parsed as: #{w3dtfdate}\n" if $debug
|
2735
|
+
return _parse_date_w3dtf(w3dtfdate)
|
2736
|
+
end
|
2737
|
+
|
2738
|
+
def rollover(num, modulus)
|
2739
|
+
return num % modulus, num / modulus
|
2740
|
+
end
|
2741
|
+
|
2742
|
+
def set_self(num, modulus)
|
2743
|
+
r = num / modulus
|
2744
|
+
if r == 0
|
2745
|
+
return num
|
2746
|
+
end
|
2747
|
+
return r
|
2748
|
+
end
|
2749
|
+
# W3DTF-style date parsing
|
2750
|
+
# FIXME shouldn't it be "W3CDTF"?
|
2751
|
+
def _parse_date_w3dtf(dateString)
|
2752
|
+
# Ruby's Time docs claim w3cdtf is an alias for iso8601 which is an alias for xmlschema
|
2753
|
+
# Whatever it is, it doesn't work. This has been fixed in Ruby 1.9 and
|
2754
|
+
# in Ruby on Rails, but not really. They don't fix the 25 hour or 61 minute or 61 second rollover and fail in other ways.
|
2755
|
+
|
2756
|
+
m = dateString.match(/^(\d{4})-?(?:(?:([01]\d)-?(?:([0123]\d)(?:T(\d\d):(\d\d):(\d\d)([+-]\d\d:\d\d|Z))?)?)?)?/)
|
2757
|
+
|
2758
|
+
w3 = m[1..3].map{|s| s=s.to_i; s += 1 if s == 0;s} # Map the year, month and day to integers and, if they were nil, set them to 1
|
2759
|
+
w3 += m[4..6].map{|s| s.to_i} # Map the hour, minute and second to integers
|
2760
|
+
w3 << m[-1] # Leave the timezone as a String
|
2761
|
+
|
2762
|
+
# FIXME this next bit needs some serious refactoring
|
2763
|
+
# Rollover times. 0 minutes and 61 seconds -> 1 minute and 1 second
|
2764
|
+
w3[5],r = rollover(w3[5], 60) # rollover seconds
|
2765
|
+
w3[4] += r
|
2766
|
+
w3[4],r = rollover(w3[4], 60) # rollover minutes
|
2767
|
+
w3[3] += r
|
2768
|
+
w3[3],r = rollover(w3[3], 24) # rollover hours
|
2769
|
+
|
2770
|
+
w3[2] = w3[2] + r
|
2771
|
+
if w3[1] > 12
|
2772
|
+
w3[1],r = rollover(w3[1],12)
|
2773
|
+
w3[1] = 12 if w3[1] == 0
|
2774
|
+
w3[0] += r
|
2775
|
+
end
|
2776
|
+
|
2777
|
+
num_days = Time.days_in_month(w3[1], w3[0])
|
2778
|
+
while w3[2] > num_days
|
2779
|
+
w3[2] -= num_days
|
2780
|
+
w3[1] += 1
|
2781
|
+
if w3[1] > 12
|
2782
|
+
w3[0] += 1
|
2783
|
+
w3[1] = set_self(w3[1], 12)
|
2784
|
+
end
|
2785
|
+
num_days = Time.days_in_month(w3[1], w3[0])
|
2786
|
+
end
|
2787
|
+
|
2788
|
+
|
2789
|
+
unless w3[6].class != String
|
2790
|
+
if /^-/ =~ w3[6] # Zone offset goes backwards
|
2791
|
+
w3[6][0] = '+'
|
2792
|
+
elsif /^\+/ =~ w3[6]
|
2793
|
+
w3[6][0] = '-'
|
2794
|
+
end
|
2795
|
+
end
|
2796
|
+
return Time.utc(w3[0], w3[1], w3[2] , w3[3], w3[4], w3[5])+Time.zone_offset(w3[6] || "UTC")
|
2797
|
+
end
|
2798
|
+
|
2799
|
+
def _parse_date_rfc822(dateString)
|
2800
|
+
# Parse an RFC822, RFC1123, RFC2822 or asctime-style date
|
2801
|
+
# These first few lines are to fix up the stupid proprietary format from Disney
|
2802
|
+
unknown_timezones = { 'AT' => 'EDT', 'ET' => 'EST',
|
2803
|
+
'CT' => 'CST', 'MT' => 'MST',
|
2804
|
+
'PT' => 'PST'
|
2805
|
+
}
|
2806
|
+
|
2807
|
+
mon = dateString.split[2]
|
2808
|
+
if mon.length > 3 and Time::RFC2822_MONTH_NAME.include?mon[0..2]
|
2809
|
+
dateString.sub!(mon,mon[0..2])
|
2810
|
+
end
|
2811
|
+
if dateString[-3..-1] != "GMT" and unknown_timezones[dateString[-2..-1]]
|
2812
|
+
dateString[-2..-1] = unknown_timezones[dateString[-2..-1]]
|
2813
|
+
end
|
2814
|
+
# Okay, the Disney date format should be fixed up now.
|
2815
|
+
rfc = dateString.match(/([A-Za-z]{3}), ([0123]\d) ([A-Za-z]{3}) (\d{4})( (\d\d):(\d\d)(?::(\d\d))? ([A-Za-z]{3}))?/)
|
2816
|
+
if rfc.to_a.length > 1 and rfc.to_a.include? nil
|
2817
|
+
dow, day, mon, year, hour, min, sec, tz = rfc[1..-1]
|
2818
|
+
hour,min,sec = [hour,min,sec].map{|e| e.to_s.rjust(2,'0') }
|
2819
|
+
tz ||= "GMT"
|
2820
|
+
end
|
2821
|
+
asctime_match = dateString.match(/([A-Za-z]{3}) ([A-Za-z]{3}) (\d?\d) (\d\d):(\d\d):(\d\d) ([A-Za-z]{3}) (\d\d\d\d)/).to_a
|
2822
|
+
if asctime_match.to_a.length > 1
|
2823
|
+
# Month-abbr dayofmonth hour:minute:second year
|
2824
|
+
dow, mon, day, hour, min, sec, tz, year = asctime_match[1..-1]
|
2825
|
+
day.to_s.rjust(2,'0')
|
2826
|
+
end
|
2827
|
+
if (rfc.to_a.length > 1 and rfc.to_a.include? nil) or asctime_match.to_a.length > 1
|
2828
|
+
ds = "#{dow}, #{day} #{mon} #{year} #{hour}:#{min}:#{sec} #{tz}"
|
2829
|
+
else
|
2830
|
+
ds = dateString
|
2831
|
+
end
|
2832
|
+
t = Time.rfc2822(ds).utc
|
2833
|
+
return t
|
2834
|
+
end
|
2835
|
+
|
2836
|
+
def _parse_date_perforce(aDateString) # FIXME not in 4.1?
|
2837
|
+
# Parse a date in yyyy/mm/dd hh:mm:ss TTT format
|
2838
|
+
# Note that there is a day of the week at the beginning
|
2839
|
+
# Ex. Fri, 2006/09/15 08:19:53 EDT
|
2840
|
+
return Time.parse(aDateString).utc
|
2841
|
+
end
|
2842
|
+
|
2843
|
+
def extract_tuple(atime)
|
2844
|
+
# NOTE leave the error handling to parse_date
|
2845
|
+
t = [atime.year, atime.month, atime.mday, atime.hour,
|
2846
|
+
atime.min, atime.sec, (atime.wday-1) % 7, atime.yday,
|
2847
|
+
atime.isdst
|
2848
|
+
]
|
2849
|
+
# yay for modulus! yaaaaaay! its 530 am and i should be sleeping! yaay!
|
2850
|
+
t[0..-2].map!{|s| s.to_i}
|
2851
|
+
t[-1] = t[-1] ? 1 : 0
|
2852
|
+
return t
|
2853
|
+
end
|
2854
|
+
|
2855
|
+
def parse_date(dateString)
|
2856
|
+
@date_handlers.each do |handler|
|
2857
|
+
begin
|
2858
|
+
$stderr << "Trying date_handler #{handler}\n" if $debug
|
2859
|
+
datething = extract_tuple(send(handler,dateString))
|
2860
|
+
return datething
|
2861
|
+
rescue Exception => e
|
2862
|
+
$stderr << "#{handler} raised #{e}\n" if $debug
|
2863
|
+
end
|
2864
|
+
end
|
2865
|
+
return nil
|
2866
|
+
end
|
2867
|
+
|
2868
|
+
end # End FeedParserMixin
|
2869
|
+
|
2870
|
+
class StrictFeedParser < XML::SAX::HandlerBase # expat
|
2871
|
+
include FeedParserMixin
|
2872
|
+
|
2873
|
+
attr_accessor :bozo, :entries, :feeddata, :exc
|
2874
|
+
def initialize(baseuri, baselang, encoding)
|
2875
|
+
$stderr << "trying StrictFeedParser\n" if $debug
|
2876
|
+
startup(baseuri, baselang, encoding)
|
2877
|
+
@bozo = false
|
2878
|
+
@exc = nil
|
2879
|
+
super()
|
2880
|
+
end
|
2881
|
+
|
2882
|
+
def getPos
|
2883
|
+
[@locator.getSystemId, @locator.getLineNumber]
|
2884
|
+
end
|
2885
|
+
|
2886
|
+
def getAttrs(attrs)
|
2887
|
+
ret = []
|
2888
|
+
for i in 0..attrs.getLength
|
2889
|
+
ret.push([attrs.getName(i), attrs.getValue(i)])
|
2890
|
+
end
|
2891
|
+
ret
|
2892
|
+
end
|
2893
|
+
|
2894
|
+
def setDocumentLocator(loc)
|
2895
|
+
@locator = loc
|
2896
|
+
end
|
2897
|
+
|
2898
|
+
def startDoctypeDecl(name, pub_sys, long_name, uri)
|
2899
|
+
#Nothing is done here. What could we do that is neat and useful?
|
2900
|
+
end
|
2901
|
+
|
2902
|
+
def startNamespaceDecl(prefix, uri)
|
2903
|
+
trackNamespace(prefix, uri)
|
2904
|
+
end
|
2905
|
+
|
2906
|
+
def endNamespaceDecl(prefix)
|
2907
|
+
end
|
2908
|
+
|
2909
|
+
def startElement(name, attrs)
|
2910
|
+
name =~ /^(([^;]*);)?(.+)$/ # Snag namespaceuri from name
|
2911
|
+
namespaceuri = ($2 || '').downcase
|
2912
|
+
name = $3
|
2913
|
+
if /backend\.userland\.com\/rss/ =~ namespaceuri
|
2914
|
+
# match any backend.userland.com namespace
|
2915
|
+
namespaceuri = 'http://backend.userland.com/rss'
|
2916
|
+
end
|
2917
|
+
prefix = @matchnamespaces[namespaceuri]
|
2918
|
+
# No need to raise UndeclaredNamespace, Expat does that for us with
|
2919
|
+
"unbound prefix (XMLParserError)"
|
2920
|
+
if prefix and not prefix.empty?
|
2921
|
+
name = prefix + ':' + name
|
2922
|
+
end
|
2923
|
+
name.downcase!
|
2924
|
+
unknown_starttag(name, attrs)
|
2925
|
+
end
|
2926
|
+
|
2927
|
+
def character(text, start, length)
|
2928
|
+
#handle_data(CGI.unescapeHTML(text))
|
2929
|
+
handle_data(text)
|
2930
|
+
end
|
2931
|
+
# expat provides "character" not "characters"!
|
2932
|
+
alias :characters :character # Just in case.
|
2933
|
+
|
2934
|
+
def startCdata(content)
|
2935
|
+
handle_data(content)
|
2936
|
+
end
|
2937
|
+
|
2938
|
+
def endElement(name)
|
2939
|
+
name =~ /^(([^;]*);)?(.+)$/ # Snag namespaceuri from name
|
2940
|
+
namespaceuri = ($2 || '').downcase
|
2941
|
+
prefix = @matchnamespaces[namespaceuri]
|
2942
|
+
if prefix and not prefix.empty?
|
2943
|
+
localname = prefix + ':' + name
|
2944
|
+
end
|
2945
|
+
name.downcase!
|
2946
|
+
unknown_endtag(name)
|
2947
|
+
end
|
2948
|
+
|
2949
|
+
def comment(comment)
|
2950
|
+
handle_comment(comment)
|
2951
|
+
end
|
2952
|
+
|
2953
|
+
def entityDecl(*foo)
|
2954
|
+
end
|
2955
|
+
|
2956
|
+
def unparsedEntityDecl(*foo)
|
2957
|
+
end
|
2958
|
+
def error(exc)
|
2959
|
+
@bozo = true
|
2960
|
+
@exc = exc
|
2961
|
+
end
|
2962
|
+
|
2963
|
+
def fatalError(exc)
|
2964
|
+
error(exc)
|
2965
|
+
raise exc
|
2966
|
+
end
|
2967
|
+
end
|
2968
|
+
|
2969
|
+
class LooseFeedParser < BetterSGMLParser
|
2970
|
+
include FeedParserMixin
|
2971
|
+
# We write the methods that were in BaseHTMLProcessor in the python code
|
2972
|
+
# in here directly. We do this because if we inherited from
|
2973
|
+
# BaseHTMLProcessor but then included from FeedParserMixin, the methods
|
2974
|
+
# of Mixin would overwrite the methods we inherited from
|
2975
|
+
# BaseHTMLProcessor. This is exactly the opposite of what we want to
|
2976
|
+
# happen!
|
2977
|
+
|
2978
|
+
attr_accessor :encoding, :bozo, :feeddata, :entries, :namespacesInUse
|
2979
|
+
|
2980
|
+
Elements_No_End_Tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
|
2981
|
+
'img', 'input', 'isindex', 'link', 'meta', 'param']
|
2982
|
+
New_Declname_Re = /[a-zA-Z][-_.a-zA-Z0-9:]*\s*/
|
2983
|
+
alias :sgml_feed :feed # feed needs to mapped to feeddata, not the SGMLParser method feed. I think.
|
2984
|
+
def feed
|
2985
|
+
@feeddata
|
2986
|
+
end
|
2987
|
+
def feed=(data)
|
2988
|
+
@feeddata = data
|
2989
|
+
end
|
2990
|
+
|
2991
|
+
def initialize(baseuri, baselang, encoding)
|
2992
|
+
startup(baseuri, baselang, encoding)
|
2993
|
+
super() # Keep the parentheses! No touchy.
|
2994
|
+
end
|
2995
|
+
|
2996
|
+
def reset
|
2997
|
+
@pieces = []
|
2998
|
+
super
|
2999
|
+
end
|
3000
|
+
|
3001
|
+
def parse(data)
|
3002
|
+
data.gsub!(/<!((?!DOCTYPE|--|\[))/i, '<!\1')
|
3003
|
+
data.gsub!(/<([^<\s]+?)\s*\/>/) do |tag|
|
3004
|
+
clean = tag[1..-3].strip
|
3005
|
+
if Elements_No_End_Tag.include?clean
|
3006
|
+
tag
|
3007
|
+
else
|
3008
|
+
'<'+clean+'></'+clean+'>'
|
3009
|
+
end
|
3010
|
+
end
|
3011
|
+
|
3012
|
+
data.gsub!(/'/, "'")
|
3013
|
+
data.gsub!(/"/, "'")
|
3014
|
+
if @encoding and not @encoding.empty? # FIXME unicode check type(u'')
|
3015
|
+
data = uconvert(data,'utf-8',@encoding)
|
3016
|
+
end
|
3017
|
+
sgml_feed(data) # see the alias above
|
3018
|
+
end
|
3019
|
+
|
3020
|
+
|
3021
|
+
def decodeEntities(element, data)
|
3022
|
+
data.gsub!('<', '<')
|
3023
|
+
data.gsub!('<', '<')
|
3024
|
+
data.gsub!('>', '>')
|
3025
|
+
data.gsub!('>', '>')
|
3026
|
+
data.gsub!('&', '&')
|
3027
|
+
data.gsub!('&', '&')
|
3028
|
+
data.gsub!('"', '"')
|
3029
|
+
data.gsub!('"', '"')
|
3030
|
+
data.gsub!(''', ''')
|
3031
|
+
data.gsub!(''', ''')
|
3032
|
+
if @contentparams.has_key? 'type' and not ((@contentparams['type'] || 'xml') =~ /xml$/u)
|
3033
|
+
data.gsub!('<', '<')
|
3034
|
+
data.gsub!('>', '>')
|
3035
|
+
data.gsub!('&', '&')
|
3036
|
+
data.gsub!('"', '"')
|
3037
|
+
data.gsub!(''', "'")
|
3038
|
+
end
|
3039
|
+
return data
|
3040
|
+
end
|
3041
|
+
end
|
3042
|
+
|
3043
|
+
def FeedParser.resolveRelativeURIs(htmlSource, baseURI, encoding)
|
3044
|
+
$stderr << "entering resolveRelativeURIs\n" if $debug # FIXME write a decent logger
|
3045
|
+
relative_uris = [ ['a','href'],
|
3046
|
+
['applet','codebase'],
|
3047
|
+
['area','href'],
|
3048
|
+
['blockquote','cite'],
|
3049
|
+
['body','background'],
|
3050
|
+
['del','cite'],
|
3051
|
+
['form','action'],
|
3052
|
+
['frame','longdesc'],
|
3053
|
+
['frame','src'],
|
3054
|
+
['iframe','longdesc'],
|
3055
|
+
['iframe','src'],
|
3056
|
+
['head','profile'],
|
3057
|
+
['img','longdesc'],
|
3058
|
+
['img','src'],
|
3059
|
+
['img','usemap'],
|
3060
|
+
['input','src'],
|
3061
|
+
['input','usemap'],
|
3062
|
+
['ins','cite'],
|
3063
|
+
['link','href'],
|
3064
|
+
['object','classid'],
|
3065
|
+
['object','codebase'],
|
3066
|
+
['object','data'],
|
3067
|
+
['object','usemap'],
|
3068
|
+
['q','cite'],
|
3069
|
+
['script','src'],
|
3070
|
+
]
|
3071
|
+
h = Hpricot(htmlSource)
|
3072
|
+
relative_uris.each do |l|
|
3073
|
+
ename, eattr = l
|
3074
|
+
h.search(ename).each do |elem|
|
3075
|
+
euri = elem.attributes[eattr]
|
3076
|
+
if euri and not euri.empty? and URI.parse(euri).relative?
|
3077
|
+
elem.attributes[eattr] = urljoin(baseURI, euri)
|
3078
|
+
end
|
3079
|
+
end
|
3080
|
+
end
|
3081
|
+
return h.to_html
|
3082
|
+
end
|
3083
|
+
|
3084
|
+
class SanitizerDoc < Hpricot::Doc
|
3085
|
+
|
3086
|
+
def scrub
|
3087
|
+
traverse_all_element do |e|
|
3088
|
+
if e.elem?
|
3089
|
+
if Acceptable_Elements.include?e.name
|
3090
|
+
e.strip_attributes
|
3091
|
+
else
|
3092
|
+
if Unacceptable_Elements_With_End_Tag.include?e.name
|
3093
|
+
e.inner_html = ''
|
3094
|
+
end
|
3095
|
+
e.swap(SanitizerDoc.new(e.children).scrub.to_html)
|
3096
|
+
# This works because the children swapped in are brought in "after" the current element.
|
3097
|
+
end
|
3098
|
+
elsif e.doctype?
|
3099
|
+
e.parent.children.delete(e)
|
3100
|
+
elsif e.text?
|
3101
|
+
ets = e.to_s
|
3102
|
+
ets.gsub!(/'/, "'")
|
3103
|
+
ets.gsub!(/"/, '"')
|
3104
|
+
ets.gsub!(/\r/,'')
|
3105
|
+
e.swap(ets)
|
3106
|
+
else
|
3107
|
+
end
|
3108
|
+
end
|
3109
|
+
# yes, that '/' should be there. It's a search method. See the Hpricot docs.
|
3110
|
+
|
3111
|
+
unless $compatible # FIXME not properly recursive, see comment in recursive_strip
|
3112
|
+
(self/tag).strip_style(@config[:allow_css_properties], @config[:allow_css_keywords])
|
3113
|
+
end
|
3114
|
+
return self
|
3115
|
+
end
|
3116
|
+
end
|
3117
|
+
|
3118
|
+
def SanitizerDoc(html)
|
3119
|
+
FeedParser::SanitizerDoc.new(Hpricot.make(html))
|
3120
|
+
end
|
3121
|
+
module_function(:SanitizerDoc)
|
3122
|
+
def self.sanitizeHTML(html,encoding)
|
3123
|
+
# FIXME Tidy not yet supported
|
3124
|
+
html = html.gsub(/<!((?!DOCTYPE|--|\[))/, '<!\1')
|
3125
|
+
h = SanitizerDoc(html)
|
3126
|
+
h = h.scrub
|
3127
|
+
return h.to_html.strip
|
3128
|
+
end
|
3129
|
+
|
3130
|
+
|
3131
|
+
|
3132
|
+
def self.getCharacterEncoding(feed, xml_data)
|
3133
|
+
# Get the character encoding of the XML document
|
3134
|
+
$stderr << "In getCharacterEncoding\n" if $debug
|
3135
|
+
sniffed_xml_encoding = nil
|
3136
|
+
xml_encoding = nil
|
3137
|
+
true_encoding = nil
|
3138
|
+
begin
|
3139
|
+
http_headers = feed.meta
|
3140
|
+
http_content_type = feed.meta['content-type'].split(';')[0]
|
3141
|
+
encoding_scan = feed.meta['content-type'].to_s.scan(/charset\s*=\s*(.*?)(?:"|')*$/)
|
3142
|
+
http_encoding = encoding_scan.flatten[0].to_s.gsub(/("|')/,'')
|
3143
|
+
http_encoding = nil if http_encoding.empty?
|
3144
|
+
# FIXME Open-Uri returns iso8859-1 if there is no charset header,
|
3145
|
+
# but that doesn't pass the tests. Open-Uri claims its following
|
3146
|
+
# the right RFC. Are they wrong or do we need to change the tests?
|
3147
|
+
rescue NoMethodError
|
3148
|
+
http_headers = {}
|
3149
|
+
http_content_type = nil
|
3150
|
+
http_encoding = nil
|
3151
|
+
end
|
3152
|
+
# Must sniff for non-ASCII-compatible character encodings before
|
3153
|
+
# searching for XML declaration. This heuristic is defined in
|
3154
|
+
# section F of the XML specification:
|
3155
|
+
# http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
|
3156
|
+
begin
|
3157
|
+
if xml_data[0..3] == "\x4c\x6f\xa7\x94"
|
3158
|
+
# EBCDIC
|
3159
|
+
xml_data = _ebcdic_to_ascii(xml_data)
|
3160
|
+
elsif xml_data[0..3] == "\x00\x3c\x00\x3f"
|
3161
|
+
# UTF-16BE
|
3162
|
+
sniffed_xml_encoding = 'utf-16be'
|
3163
|
+
xml_data = uconvert(xml_data, 'utf-16be', 'utf-8')
|
3164
|
+
elsif xml_data.size >= 4 and xml_data[0..1] == "\xfe\xff" and xml_data[2..3] != "\x00\x00"
|
3165
|
+
# UTF-16BE with BOM
|
3166
|
+
sniffed_xml_encoding = 'utf-16be'
|
3167
|
+
xml_data = uconvert(xml_data[2..-1], 'utf-16be', 'utf-8')
|
3168
|
+
elsif xml_data[0..3] == "\x3c\x00\x3f\x00"
|
3169
|
+
# UTF-16LE
|
3170
|
+
sniffed_xml_encoding = 'utf-16le'
|
3171
|
+
xml_data = uconvert(xml_data, 'utf-16le', 'utf-8')
|
3172
|
+
elsif xml_data.size >=4 and xml_data[0..1] == "\xff\xfe" and xml_data[2..3] != "\x00\x00"
|
3173
|
+
# UTF-16LE with BOM
|
3174
|
+
sniffed_xml_encoding = 'utf-16le'
|
3175
|
+
xml_data = uconvert(xml_data[2..-1], 'utf-16le', 'utf-8')
|
3176
|
+
elsif xml_data[0..3] == "\x00\x00\x00\x3c"
|
3177
|
+
# UTF-32BE
|
3178
|
+
sniffed_xml_encoding = 'utf-32be'
|
3179
|
+
xml_data = uconvert(xml_data, 'utf-32be', 'utf-8')
|
3180
|
+
elsif xml_data[0..3] == "\x3c\x00\x00\x00"
|
3181
|
+
# UTF-32LE
|
3182
|
+
sniffed_xml_encoding = 'utf-32le'
|
3183
|
+
xml_data = uconvert(xml_data, 'utf-32le', 'utf-8')
|
3184
|
+
elsif xml_data[0..3] == "\x00\x00\xfe\xff"
|
3185
|
+
# UTF-32BE with BOM
|
3186
|
+
sniffed_xml_encoding = 'utf-32be'
|
3187
|
+
xml_data = uconvert(xml_data[4..-1], 'utf-32BE', 'utf-8')
|
3188
|
+
elsif xml_data[0..3] == "\xff\xfe\x00\x00"
|
3189
|
+
# UTF-32LE with BOM
|
3190
|
+
sniffed_xml_encoding = 'utf-32le'
|
3191
|
+
xml_data = uconvert(xml_data[4..-1], 'utf-32le', 'utf-8')
|
3192
|
+
elsif xml_data[0..2] == "\xef\xbb\xbf"
|
3193
|
+
# UTF-8 with BOM
|
3194
|
+
sniffed_xml_encoding = 'utf-8'
|
3195
|
+
xml_data = xml_data[3..-1]
|
3196
|
+
else
|
3197
|
+
# ASCII-compatible
|
3198
|
+
end
|
3199
|
+
xml_encoding_match = /^<\?.*encoding=[\'"](.*?)[\'"].*\?>/.match(xml_data)
|
3200
|
+
rescue
|
3201
|
+
xml_encoding_match = nil
|
3202
|
+
end
|
3203
|
+
if xml_encoding_match
|
3204
|
+
xml_encoding = xml_encoding_match[1].downcase
|
3205
|
+
xencodings = ['iso-10646-ucs-2', 'ucs-2', 'csunicode', 'iso-10646-ucs-4', 'ucs-4', 'csucs4', 'utf-16', 'utf-32', 'utf_16', 'utf_32', 'utf16', 'u16']
|
3206
|
+
if sniffed_xml_encoding and xencodings.include?xml_encoding
|
3207
|
+
xml_encoding = sniffed_xml_encoding
|
3208
|
+
end
|
3209
|
+
end
|
3210
|
+
|
3211
|
+
acceptable_content_type = false
|
3212
|
+
application_content_types = ['application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity']
|
3213
|
+
text_content_types = ['text/xml', 'text/xml-external-parsed-entity']
|
3214
|
+
|
3215
|
+
if application_content_types.include?(http_content_type) or (/^application\// =~ http_content_type and /\+xml$/ =~ http_content_type)
|
3216
|
+
acceptable_content_type = true
|
3217
|
+
true_encoding = http_encoding || xml_encoding || 'utf-8'
|
3218
|
+
elsif text_content_types.include?(http_content_type) or (/^text\// =~ http_content_type and /\+xml$/ =~ http_content_type)
|
3219
|
+
acceptable_content_type = true
|
3220
|
+
true_encoding = http_encoding || 'us-ascii'
|
3221
|
+
elsif /^text\// =~ http_content_type
|
3222
|
+
true_encoding = http_encoding || 'us-ascii'
|
3223
|
+
elsif http_headers and not http_headers.empty? and not http_headers.has_key?'content-type'
|
3224
|
+
true_encoding = xml_encoding || 'iso-8859-1'
|
3225
|
+
else
|
3226
|
+
true_encoding = xml_encoding || 'utf-8'
|
3227
|
+
end
|
3228
|
+
return true_encoding, http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type
|
3229
|
+
end
|
3230
|
+
|
3231
|
+
def self.toUTF8(data, encoding)
|
3232
|
+
=begin
|
3233
|
+
Changes an XML data stream on the fly to specify a new encoding
|
3234
|
+
|
3235
|
+
data is a raw sequence of bytes (not Unicode) that is presumed to be in %encoding already
|
3236
|
+
encoding is a string recognized by encodings.aliases
|
3237
|
+
=end
|
3238
|
+
$stderr << "entering self.toUTF8, trying encoding %s\n" % encoding if $debug
|
3239
|
+
# NOTE we must use double quotes when dealing with \x encodings!
|
3240
|
+
if (data.size >= 4 and data[0..1] == "\xfe\xff" and data[2..3] != "\x00\x00")
|
3241
|
+
if $debug
|
3242
|
+
$stderr << "stripping BOM\n"
|
3243
|
+
if encoding != 'utf-16be'
|
3244
|
+
$stderr << "string utf-16be instead\n"
|
3245
|
+
end
|
3246
|
+
end
|
3247
|
+
encoding = 'utf-16be'
|
3248
|
+
data = data[2..-1]
|
3249
|
+
elsif (data.size >= 4 and data[0..1] == "\xff\xfe" and data[2..3] != "\x00\x00")
|
3250
|
+
if $debug
|
3251
|
+
$stderr << "stripping BOM\n"
|
3252
|
+
$stderr << "trying utf-16le instead\n" if encoding != 'utf-16le'
|
3253
|
+
end
|
3254
|
+
encoding = 'utf-16le'
|
3255
|
+
data = data[2..-1]
|
3256
|
+
elsif (data[0..2] == "\xef\xbb\xbf")
|
3257
|
+
if $debug
|
3258
|
+
$stderr << "stripping BOM\n"
|
3259
|
+
$stderr << "trying utf-8 instead\n" if encoding != 'utf-8'
|
3260
|
+
end
|
3261
|
+
encoding = 'utf-8'
|
3262
|
+
data = data[3..-1]
|
3263
|
+
elsif (data[0..3] == "\x00\x00\xfe\xff")
|
3264
|
+
if $debug
|
3265
|
+
$stderr << "stripping BOM\n"
|
3266
|
+
if encoding != 'utf-32be'
|
3267
|
+
$stderr << "trying utf-32be instead\n"
|
3268
|
+
end
|
3269
|
+
end
|
3270
|
+
encoding = 'utf-32be'
|
3271
|
+
data = data[4..-1]
|
3272
|
+
elsif (data[0..3] == "\xff\xfe\x00\x00")
|
3273
|
+
if $debug
|
3274
|
+
$stderr << "stripping BOM\n"
|
3275
|
+
if encoding != 'utf-32le'
|
3276
|
+
$stderr << "trying utf-32le instead\n"
|
3277
|
+
end
|
3278
|
+
end
|
3279
|
+
encoding = 'utf-32le'
|
3280
|
+
data = data[4..-1]
|
3281
|
+
end
|
3282
|
+
begin
|
3283
|
+
newdata = uconvert(data, encoding, 'utf-8')
|
3284
|
+
rescue => details
|
3285
|
+
end
|
3286
|
+
$stderr << "successfully converted #{encoding} data to utf-8\n" if $debug
|
3287
|
+
declmatch = /^<\?xml[^>]*?>/
|
3288
|
+
newdecl = "<?xml version=\'1.0\' encoding=\'utf-8\'?>"
|
3289
|
+
if declmatch =~ newdata
|
3290
|
+
newdata.sub!(declmatch, newdecl)
|
3291
|
+
else
|
3292
|
+
newdata = newdecl + "\n" + newdata
|
3293
|
+
end
|
3294
|
+
return newdata
|
3295
|
+
end
|
3296
|
+
|
3297
|
+
def self.stripDoctype(data)
|
3298
|
+
=begin
|
3299
|
+
Strips DOCTYPE from XML document, returns (rss_version, stripped_data)
|
3300
|
+
|
3301
|
+
rss_version may be 'rss091n' or None
|
3302
|
+
stripped_data is the same XML document, minus the DOCTYPE
|
3303
|
+
=end
|
3304
|
+
entity_pattern = /<!ENTITY(.*?)>/m # m is for Regexp::MULTILINE
|
3305
|
+
data = data.gsub(entity_pattern,'')
|
3306
|
+
|
3307
|
+
doctype_pattern = /<!DOCTYPE(.*?)>/m
|
3308
|
+
doctype_results = data.scan(doctype_pattern)
|
3309
|
+
if doctype_results and doctype_results[0]
|
3310
|
+
doctype = doctype_results[0][0]
|
3311
|
+
else
|
3312
|
+
doctype = ''
|
3313
|
+
end
|
3314
|
+
|
3315
|
+
if /netscape/ =~ doctype.downcase
|
3316
|
+
version = 'rss091n'
|
3317
|
+
else
|
3318
|
+
version = nil
|
3319
|
+
end
|
3320
|
+
data = data.sub(doctype_pattern, '')
|
3321
|
+
return version, data
|
3322
|
+
end
|
3323
|
+
|
3324
|
+
def parse(*args); FeedParser.parse(*args); end
|
3325
|
+
def FeedParser.parse(furi, options={})
|
3326
|
+
# Parse a feed from a URL, file, stream or string
|
3327
|
+
$compatible = options[:compatible] || $compatible # Use the default compatibility if compatible is nil
|
3328
|
+
result = FeedParserDict.new
|
3329
|
+
result['feed'] = FeedParserDict.new
|
3330
|
+
result['entries'] = []
|
3331
|
+
if options[:modified]
|
3332
|
+
options[:modified] = Time.parse(options[:modified]).rfc2822
|
3333
|
+
# FIXME this ignores all of our time parsing work. Does it matter?
|
3334
|
+
end
|
3335
|
+
result['bozo'] = false
|
3336
|
+
handlers = options[:handlers]
|
3337
|
+
|
3338
|
+
if handlers.class != Array # FIXME why does this happen?
|
3339
|
+
handlers = [handlers]
|
3340
|
+
end
|
3341
|
+
|
3342
|
+
begin
|
3343
|
+
if URI::parse(furi).class == URI::Generic
|
3344
|
+
f = open(furi) # OpenURI doesn't behave well when passing HTTP options to a file.
|
3345
|
+
else
|
3346
|
+
# And when you do pass them, make sure they aren't just nil (this still true?)
|
3347
|
+
newd = {}
|
3348
|
+
newd["If-None-Match"] = options[:etag] unless options[:etag].nil?
|
3349
|
+
newd["If-Modified-Since"] = options[:modified] unless options[:modified].nil?
|
3350
|
+
newd["User-Agent"] = (options[:agent] || USER_AGENT).to_s
|
3351
|
+
newd["Referer"] = options[:referrer] unless options[:referrer].nil?
|
3352
|
+
newd["Content-Location"] = options[:content_location] unless options[:content_location].nil?
|
3353
|
+
newd["Content-Language"] = options[:content_language] unless options[:content_language].nil?
|
3354
|
+
newd["Content-type"] = options[:content_type] unless options[:content_type].nil?
|
3355
|
+
|
3356
|
+
f = open(furi, newd)
|
3357
|
+
end
|
3358
|
+
|
3359
|
+
data = f.read
|
3360
|
+
f.close
|
3361
|
+
rescue => e
|
3362
|
+
$stderr << "Rescued in parse: "+e.to_s+"\n" if $debug # My addition
|
3363
|
+
result['bozo'] = true
|
3364
|
+
result['bozo_exception'] = e
|
3365
|
+
data = ''
|
3366
|
+
f = nil
|
3367
|
+
end
|
3368
|
+
begin
|
3369
|
+
if f.meta
|
3370
|
+
result['etag'] = options[:etag] || f.meta['etag']
|
3371
|
+
result['modified'] = options[:modified] || f.last_modified
|
3372
|
+
result['url'] = f.base_uri.to_s
|
3373
|
+
result['status'] = f.status[0] || 200
|
3374
|
+
result['headers'] = f.meta
|
3375
|
+
result['headers']['content-location'] ||= options[:content_location] unless options[:content_location].nil?
|
3376
|
+
result['headers']['content-language'] ||= options[:content_language] unless options[:content_language].nil?
|
3377
|
+
result['headers']['content-type'] ||= options[:content_type] unless options[:content_type].nil?
|
3378
|
+
end
|
3379
|
+
rescue NoMethodError
|
3380
|
+
result['headers'] = {}
|
3381
|
+
result['etag'] = result['headers']['etag'] = options[:etag] unless options[:etag].nil?
|
3382
|
+
result['modified'] = result['headers']['last-modified'] = options[:modified] unless options[:modified].nil?
|
3383
|
+
unless options[:content_location].nil?
|
3384
|
+
result['headers']['content-location'] = options[:content_location]
|
3385
|
+
end
|
3386
|
+
unless options[:content_language].nil?
|
3387
|
+
result['headers']['content-language'] = options[:content_language]
|
3388
|
+
end
|
3389
|
+
unless options[:content_type].nil?
|
3390
|
+
result['headers']['content-type'] = options[:content_type]
|
3391
|
+
end
|
3392
|
+
end
|
3393
|
+
|
3394
|
+
|
3395
|
+
# there are four encodings to keep track of:
|
3396
|
+
# - http_encoding is the encoding declared in the Content-Type HTTP header
|
3397
|
+
# - xml_encoding is the encoding declared in the <?xml declaration
|
3398
|
+
# - sniffed_encoding is the encoding sniffed from the first 4 bytes of the XML data
|
3399
|
+
# - result['encoding'] is the actual encoding, as per RFC 3023 and a variety of other conflicting specifications
|
3400
|
+
http_headers = result['headers']
|
3401
|
+
result['encoding'], http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type =
|
3402
|
+
self.getCharacterEncoding(f,data)
|
3403
|
+
|
3404
|
+
if not http_headers.empty? and not acceptable_content_type
|
3405
|
+
if http_headers.has_key?('content-type')
|
3406
|
+
bozo_message = "#{http_headers['content-type']} is not an XML media type"
|
3407
|
+
else
|
3408
|
+
bozo_message = 'no Content-type specified'
|
3409
|
+
end
|
3410
|
+
result['bozo'] = true
|
3411
|
+
result['bozo_exception'] = NonXMLContentType.new(bozo_message) # I get to care about this, cuz Mark says I should.
|
3412
|
+
end
|
3413
|
+
result['version'], data = self.stripDoctype(data)
|
3414
|
+
baseuri = http_headers['content-location'] || result['href']
|
3415
|
+
baselang = http_headers['content-language']
|
3416
|
+
|
3417
|
+
# if server sent 304, we're done
|
3418
|
+
if result['status'] == 304
|
3419
|
+
result['version'] = ''
|
3420
|
+
result['debug_message'] = "The feed has not changed since you last checked, " +
|
3421
|
+
"so the server sent no data. This is a feature, not a bug!"
|
3422
|
+
return result
|
3423
|
+
end
|
3424
|
+
|
3425
|
+
# if there was a problem downloading, we're done
|
3426
|
+
if data.nil? or data.empty?
|
3427
|
+
return result
|
3428
|
+
end
|
3429
|
+
|
3430
|
+
# determine character encoding
|
3431
|
+
use_strict_parser = false
|
3432
|
+
known_encoding = false
|
3433
|
+
tried_encodings = []
|
3434
|
+
proposed_encoding = nil
|
3435
|
+
# try: HTTP encoding, declared XML encoding, encoding sniffed from BOM
|
3436
|
+
[result['encoding'], xml_encoding, sniffed_xml_encoding].each do |proposed_encoding|
|
3437
|
+
next if proposed_encoding.nil? or proposed_encoding.empty?
|
3438
|
+
next if tried_encodings.include? proposed_encoding
|
3439
|
+
tried_encodings << proposed_encoding
|
3440
|
+
begin
|
3441
|
+
data = self.toUTF8(data, proposed_encoding)
|
3442
|
+
known_encoding = use_strict_parser = true
|
3443
|
+
break
|
3444
|
+
rescue
|
3445
|
+
end
|
3446
|
+
end
|
3447
|
+
# if no luck and we have auto-detection library, try that
|
3448
|
+
if not known_encoding and $chardet
|
3449
|
+
begin
|
3450
|
+
proposed_encoding = CharDet.detect(data)['encoding']
|
3451
|
+
if proposed_encoding and not tried_encodings.include?proposed_encoding
|
3452
|
+
tried_encodings << proposed_encoding
|
3453
|
+
data = self.toUTF8(data, proposed_encoding)
|
3454
|
+
known_encoding = use_strict_parser = true
|
3455
|
+
end
|
3456
|
+
rescue
|
3457
|
+
end
|
3458
|
+
end
|
3459
|
+
|
3460
|
+
|
3461
|
+
|
3462
|
+
# if still no luck and we haven't tried utf-8 yet, try that
|
3463
|
+
if not known_encoding and not tried_encodings.include?'utf-8'
|
3464
|
+
begin
|
3465
|
+
proposed_encoding = 'utf-8'
|
3466
|
+
tried_encodings << proposed_encoding
|
3467
|
+
data = self.toUTF8(data, proposed_encoding)
|
3468
|
+
known_encoding = use_strict_parser = true
|
3469
|
+
rescue
|
3470
|
+
end
|
3471
|
+
end
|
3472
|
+
# if still no luck and we haven't tried windows-1252 yet, try that
|
3473
|
+
if not known_encoding and not tried_encodings.include?'windows-1252'
|
3474
|
+
begin
|
3475
|
+
proposed_encdoing = 'windows-1252'
|
3476
|
+
tried_encodings << proposed_encoding
|
3477
|
+
data = self.toUTF8(data, proposed_encoding)
|
3478
|
+
known_encoding = use_strict_parser = true
|
3479
|
+
rescue
|
3480
|
+
end
|
3481
|
+
end
|
3482
|
+
|
3483
|
+
# NOTE this isn't in FeedParser.py 4.1
|
3484
|
+
# if still no luck and we haven't tried iso-8859-2 yet, try that.
|
3485
|
+
#if not known_encoding and not tried_encodings.include?'iso-8859-2'
|
3486
|
+
# begin
|
3487
|
+
# proposed_encoding = 'iso-8859-2'
|
3488
|
+
# tried_encodings << proposed_encoding
|
3489
|
+
# data = self.toUTF8(data, proposed_encoding)
|
3490
|
+
# known_encoding = use_strict_parser = true
|
3491
|
+
# rescue
|
3492
|
+
# end
|
3493
|
+
#end
|
3494
|
+
|
3495
|
+
|
3496
|
+
# if still no luck, give up
|
3497
|
+
if not known_encoding
|
3498
|
+
result['bozo'] = true
|
3499
|
+
result['bozo_exception'] = CharacterEncodingUnknown.new("document encoding unknown, I tried #{result['encoding']}, #{xml_encoding}, utf-8 and windows-1252 but nothing worked")
|
3500
|
+
result['encoding'] = ''
|
3501
|
+
elsif proposed_encoding != result['encoding']
|
3502
|
+
result['bozo'] = true
|
3503
|
+
result['bozo_exception'] = CharacterEncodingOverride.new("documented declared as #{result['encoding']}, but parsed as #{proposed_encoding}")
|
3504
|
+
result['encoding'] = proposed_encoding
|
3505
|
+
end
|
3506
|
+
|
3507
|
+
if use_strict_parser
|
3508
|
+
# initialize the SAX parser
|
3509
|
+
saxparser = XML::SAX::Helpers::ParserFactory.makeParser("XML::Parser::SAXDriver")
|
3510
|
+
feedparser = StrictFeedParser.new(baseuri, baselang, 'utf-8')
|
3511
|
+
saxparser.setDocumentHandler(feedparser)
|
3512
|
+
saxparser.setDTDHandler(feedparser)
|
3513
|
+
saxparser.setEntityResolver(feedparser)
|
3514
|
+
saxparser.setErrorHandler(feedparser)
|
3515
|
+
|
3516
|
+
inputdata = XML::SAX::InputSource.new('parsedfeed')
|
3517
|
+
inputdata.setByteStream(StringIO.new(data))
|
3518
|
+
begin
|
3519
|
+
saxparser.parse(inputdata)
|
3520
|
+
rescue Exception => parseerr # resparse
|
3521
|
+
if $debug
|
3522
|
+
$stderr << "xml parsing failed\n"
|
3523
|
+
$stderr << parseerr.to_s+"\n" # Hrmph.
|
3524
|
+
end
|
3525
|
+
result['bozo'] = true
|
3526
|
+
result['bozo_exception'] = feedparser.exc || e
|
3527
|
+
use_strict_parser = false
|
3528
|
+
end
|
3529
|
+
end
|
3530
|
+
if not use_strict_parser
|
3531
|
+
feedparser = LooseFeedParser.new(baseuri, baselang, (known_encoding and 'utf-8' or ''))
|
3532
|
+
feedparser.parse(data)
|
3533
|
+
$stderr << "Using LooseFeed\n\n" if $debug
|
3534
|
+
end
|
3535
|
+
result['feed'] = feedparser.feeddata
|
3536
|
+
result['entries'] = feedparser.entries
|
3537
|
+
result['version'] = result['version'] || feedparser.version
|
3538
|
+
result['namespaces'] = feedparser.namespacesInUse
|
3539
|
+
return result
|
3540
|
+
end
|
3541
|
+
end # End FeedParser module
|
3542
|
+
|
3543
|
+
class Serializer
|
3544
|
+
def initialize(results)
|
3545
|
+
@results = results
|
3546
|
+
end
|
3547
|
+
end
|
3548
|
+
|
3549
|
+
class TextSerializer < Serializer
|
3550
|
+
def write(stream=$stdout)
|
3551
|
+
writer(stream, @results, '')
|
3552
|
+
end
|
3553
|
+
|
3554
|
+
def writer(stream, node, prefix)
|
3555
|
+
return if (node.nil? or node.empty?)
|
3556
|
+
if node.methods.include?'keys'
|
3557
|
+
node.keys.sort.each do |key|
|
3558
|
+
next if ['description','link'].include? key
|
3559
|
+
next if node.has_key? k+'_detail'
|
3560
|
+
next if node.has_key? k+'_parsed'
|
3561
|
+
writer(stream,node[k], prefix+k+'.')
|
3562
|
+
end
|
3563
|
+
elsif node.class == Array
|
3564
|
+
node.each_with_index do |thing, index|
|
3565
|
+
writer(stream, thing, prefix[0..-2] + '[' + index.to_s + '].')
|
3566
|
+
end
|
3567
|
+
else
|
3568
|
+
begin
|
3569
|
+
s = u(node.to_s)
|
3570
|
+
stream << prefix[0..-2]
|
3571
|
+
stream << '='
|
3572
|
+
stream << s
|
3573
|
+
stream << "\n"
|
3574
|
+
rescue
|
3575
|
+
end
|
3576
|
+
end
|
3577
|
+
end
|
3578
|
+
end
|
3579
|
+
|
3580
|
+
class PprintSerializer < Serializer # FIXME ? use pp instead?
|
3581
|
+
def write(stream = $stdout)
|
3582
|
+
stream << @results['href'].to_s + "\n\n"
|
3583
|
+
pp(@results)
|
3584
|
+
stream << "\n"
|
3585
|
+
end
|
3586
|
+
end
|
3587
|
+
|
3588
|
+
|
3589
|
+
require 'optparse'
|
3590
|
+
require 'ostruct'
|
3591
|
+
options = OpenStruct.new
|
3592
|
+
options.etag = options.modified = options.agent = options.referrer = nil
|
3593
|
+
options.content_language = options.content_location = options.ctype = nil
|
3594
|
+
options.format = 'pprint'
|
3595
|
+
options.compatible = $compatible
|
3596
|
+
options.verbose = false
|
3597
|
+
|
3598
|
+
opts = OptionParser.new do |opts|
|
3599
|
+
opts.banner
|
3600
|
+
opts.separator ""
|
3601
|
+
opts.on("-A", "--user-agent [AGENT]",
|
3602
|
+
"User-Agent for HTTP URLs") {|agent|
|
3603
|
+
options.agent = agent
|
3604
|
+
}
|
3605
|
+
|
3606
|
+
opts.on("-e", "--referrer [URL]",
|
3607
|
+
"Referrer for HTTP URLs") {|referrer|
|
3608
|
+
options.referrer = referrer
|
3609
|
+
}
|
3610
|
+
|
3611
|
+
opts.on("-t", "--etag [TAG]",
|
3612
|
+
"ETag/If-None-Match for HTTP URLs") {|etag|
|
3613
|
+
options.etag = etag
|
3614
|
+
}
|
3615
|
+
|
3616
|
+
opts.on("-m", "--last-modified [DATE]",
|
3617
|
+
"Last-modified/If-Modified-Since for HTTP URLs (any supported date format)") {|modified|
|
3618
|
+
options.modified = modified
|
3619
|
+
}
|
3620
|
+
|
3621
|
+
opts.on("-f", "--format [FORMAT]", [:text, :pprint],
|
3622
|
+
"output resutls in FORMAT (text, pprint)") {|format|
|
3623
|
+
options.format = format
|
3624
|
+
}
|
3625
|
+
|
3626
|
+
opts.on("-v", "--[no-]verbose",
|
3627
|
+
"write debugging information to stderr") {|v|
|
3628
|
+
options.verbose = v
|
3629
|
+
}
|
3630
|
+
|
3631
|
+
opts.on("-c", "--[no-]compatible",
|
3632
|
+
"strip element attributes like feedparser.py 4.1 (default)") {|comp|
|
3633
|
+
options.compatible = comp
|
3634
|
+
}
|
3635
|
+
opts.on("-l", "--content-location [LOCATION]",
|
3636
|
+
"default Content-Location HTTP header") {|loc|
|
3637
|
+
options.content_location = loc
|
3638
|
+
}
|
3639
|
+
opts.on("-a", "--content-language [LANG]",
|
3640
|
+
"default Content-Language HTTP header") {|lang|
|
3641
|
+
options.content_language = lang
|
3642
|
+
}
|
3643
|
+
opts.on("-t", "--content-type [TYPE]",
|
3644
|
+
"default Content-type HTTP header") {|ctype|
|
3645
|
+
options.ctype = ctype
|
3646
|
+
}
|
3647
|
+
end
|
3648
|
+
|
3649
|
+
opts.parse!(ARGV)
|
3650
|
+
$debug = true if options.verbose
|
3651
|
+
$compatible = options.compatible unless options.compatible.nil?
|
3652
|
+
|
3653
|
+
if options.format == :text
|
3654
|
+
serializer = TextSerializer
|
3655
|
+
else
|
3656
|
+
serializer = PprintSerializer
|
3657
|
+
end
|
3658
|
+
args = *ARGV.dup
|
3659
|
+
unless args.nil?
|
3660
|
+
args.each do |url| # opts.parse! removes everything but the urls from the command line
|
3661
|
+
results = FeedParser.parse(url, :etag => options.etag,
|
3662
|
+
:modified => options.modified,
|
3663
|
+
:agent => options.agent,
|
3664
|
+
:referrer => options.referrer,
|
3665
|
+
:content_location => options.content_location,
|
3666
|
+
:content_language => options.content_language,
|
3667
|
+
:content_type => options.ctype
|
3668
|
+
)
|
3669
|
+
serializer.new(results).write($stdout)
|
3670
|
+
end
|
3671
|
+
end
|