microformats 0.3 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/Guardfile +5 -0
- data/LICENSE.md +36 -0
- data/README.md +228 -0
- data/Rakefile +7 -25
- data/lib/microformats.rb +25 -9
- data/lib/microformats/absolute_uri.rb +30 -0
- data/lib/microformats/format_parser.rb +343 -0
- data/lib/microformats/parser.rb +105 -0
- data/lib/microformats/parser_core.rb +343 -0
- data/lib/microformats/property_parser.rb +122 -0
- data/lib/microformats/results/collection.rb +121 -0
- data/lib/microformats/results/parser_result.rb +111 -0
- data/lib/microformats/results/property_set.rb +87 -0
- data/lib/microformats/time_property_parser.rb +161 -0
- data/lib/microformats/version.rb +3 -0
- data/microformats.gemspec +37 -0
- data/spec/lib/microformats/absolute_uri_spec.rb +48 -0
- data/spec/lib/microformats/parser_spec.rb +72 -0
- data/spec/lib/microformats_spec.rb +33 -0
- data/spec/spec_helper.rb +10 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-0.html +2 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-0.js +9 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-1.html +2 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-1.js +10 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-2.html +5 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-2.js +11 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-3.html +17 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-3.js +20 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-4.html +16 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-4.js +24 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-5.html +7 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-5.js +11 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-6.html +9 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-6.js +18 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-7.html +9 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-7.js +18 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-8.html +9 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-8.js +17 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-9.html +9 -0
- data/spec/support/cases/microformats.org/microformats-2/microformats-2-9.js +17 -0
- data/spec/support/lib/microformats/implied_property/name-fail.html +60 -0
- data/spec/support/lib/microformats/implied_property/name-pass.html +30 -0
- data/spec/support/lib/microformats/implied_property/photo-fail.html +60 -0
- data/spec/support/lib/microformats/implied_property/photo-pass.html +30 -0
- data/spec/support/lib/microformats/implied_property/url-fail.html +18 -0
- data/spec/support/lib/microformats/implied_property/url-pass.html +12 -0
- data/spec/support/lib/microformats/implied_property/url-relative.html +13 -0
- data/spec/support/lib/microformats/implied_property/url-unnormalized.html +12 -0
- data/spec/support/lib/microformats/nested-format-with-property-of-same-name.html +13 -0
- data/spec/support/lib/microformats/nested-format-with-property-of-same-name.js +16 -0
- data/spec/support/lib/microformats/nested-format-with-property.html +12 -0
- data/spec/support/lib/microformats/nested-format-with-property.js +16 -0
- data/spec/support/lib/microformats/nested-format-without-property.html +13 -0
- data/spec/support/lib/microformats/nested-property.html +12 -0
- data/spec/support/lib/microformats/nested-property.js +10 -0
- data/spec/support/lib/microformats/rels-that-drop-the-base.html +11 -0
- data/spec/support/lib/microformats/rels-with-unnormalized-urls.html +11 -0
- data/spec/support/lib/microformats/rels.html +12 -0
- data/spec/support/lib/microformats/rels.js +13 -0
- data/spec/support/lib/microformats/simple.html +23 -0
- data/spec/support/lib/microformats/simple.js +11 -0
- data/vendor/tests/.gitignore +25 -0
- data/vendor/tests/LICENSE.md +36 -0
- data/vendor/tests/README.md +48 -0
- data/vendor/tests/app.js +84 -0
- data/vendor/tests/composer.json +5 -0
- data/vendor/tests/css/testsuite.css +159 -0
- data/vendor/tests/interface.js +18 -0
- data/vendor/tests/package.json +27 -0
- data/vendor/tests/tests/microformats-mixed/h-card/change-log.html +63 -0
- data/vendor/tests/tests/microformats-mixed/h-card/mixedpropertries.html +14 -0
- data/vendor/tests/tests/microformats-mixed/h-card/mixedpropertries.json +22 -0
- data/vendor/tests/tests/microformats-mixed/h-card/tworoots.html +1 -0
- data/vendor/tests/tests/microformats-mixed/h-card/tworoots.json +10 -0
- data/vendor/tests/tests/microformats-mixed/h-entry/mixedroots.html +16 -0
- data/vendor/tests/tests/microformats-mixed/h-entry/mixedroots.json +38 -0
- data/vendor/tests/tests/microformats-mixed/h-resume/change-log.html +68 -0
- data/vendor/tests/tests/microformats-mixed/h-resume/mixedroots.html +16 -0
- data/vendor/tests/tests/microformats-mixed/h-resume/mixedroots.json +31 -0
- data/vendor/tests/tests/microformats-v1/adr/change-log.html +63 -0
- data/vendor/tests/tests/microformats-v1/adr/simpleproperties.html +8 -0
- data/vendor/tests/tests/microformats-v1/adr/simpleproperties.json +15 -0
- data/vendor/tests/tests/microformats-v1/geo/abbrpattern.html +5 -0
- data/vendor/tests/tests/microformats-v1/geo/abbrpattern.json +11 -0
- data/vendor/tests/tests/microformats-v1/geo/change-log.1.html +78 -0
- data/vendor/tests/tests/microformats-v1/geo/change-log.html +63 -0
- data/vendor/tests/tests/microformats-v1/geo/hidden.html +10 -0
- data/vendor/tests/tests/microformats-v1/geo/hidden.json +11 -0
- data/vendor/tests/tests/microformats-v1/geo/simpleproperties.html +6 -0
- data/vendor/tests/tests/microformats-v1/geo/simpleproperties.json +11 -0
- data/vendor/tests/tests/microformats-v1/geo/valuetitleclass.html +11 -0
- data/vendor/tests/tests/microformats-v1/geo/valuetitleclass.json +11 -0
- data/vendor/tests/tests/microformats-v1/hcalendar/ampm.html +41 -0
- data/vendor/tests/tests/microformats-v1/hcalendar/ampm.json +21 -0
- data/vendor/tests/tests/microformats-v1/hcalendar/attendees.html +13 -0
- data/vendor/tests/tests/microformats-v1/hcalendar/attendees.json +37 -0
- data/vendor/tests/tests/microformats-v1/hcalendar/change-log.html +68 -0
- data/vendor/tests/tests/microformats-v1/hcalendar/combining.html +15 -0
- data/vendor/tests/tests/microformats-v1/hcalendar/combining.json +31 -0
- data/vendor/tests/tests/microformats-v1/hcalendar/concatenate.html +7 -0
- data/vendor/tests/tests/microformats-v1/hcalendar/concatenate.json +12 -0
- data/vendor/tests/tests/microformats-v1/hcalendar/time.html +44 -0
- data/vendor/tests/tests/microformats-v1/hcalendar/time.json +22 -0
- data/vendor/tests/tests/microformats-v1/hcard/change-log.html +68 -0
- data/vendor/tests/tests/microformats-v1/hcard/email.html +14 -0
- data/vendor/tests/tests/microformats-v1/hcard/email.json +11 -0
- data/vendor/tests/tests/microformats-v1/hcard/format.html +6 -0
- data/vendor/tests/tests/microformats-v1/hcard/format.json +11 -0
- data/vendor/tests/tests/microformats-v1/hcard/hyperlinkedphoto.html +3 -0
- data/vendor/tests/tests/microformats-v1/hcard/hyperlinkedphoto.json +8 -0
- data/vendor/tests/tests/microformats-v1/hcard/justahyperlink.html +1 -0
- data/vendor/tests/tests/microformats-v1/hcard/justahyperlink.json +8 -0
- data/vendor/tests/tests/microformats-v1/hcard/justaname.html +1 -0
- data/vendor/tests/tests/microformats-v1/hcard/justaname.json +8 -0
- data/vendor/tests/tests/microformats-v1/hcard/multiple.html +74 -0
- data/vendor/tests/tests/microformats-v1/hcard/multiple.json +65 -0
- data/vendor/tests/tests/microformats-v1/hcard/name.html +11 -0
- data/vendor/tests/tests/microformats-v1/hcard/name.json +15 -0
- data/vendor/tests/tests/microformats-v1/hcard/single.html +14 -0
- data/vendor/tests/tests/microformats-v1/hcard/single.json +24 -0
- data/vendor/tests/tests/microformats-v1/hentry/change-log.html +73 -0
- data/vendor/tests/tests/microformats-v1/hentry/summarycontent.html +20 -0
- data/vendor/tests/tests/microformats-v1/hentry/summarycontent.json +24 -0
- data/vendor/tests/tests/microformats-v1/hfeed/simple.html +30 -0
- data/vendor/tests/tests/microformats-v1/hfeed/simple.json +49 -0
- data/vendor/tests/tests/microformats-v1/hnews/all.html +37 -0
- data/vendor/tests/tests/microformats-v1/hnews/all.json +74 -0
- data/vendor/tests/tests/microformats-v1/hnews/change-log.html +72 -0
- data/vendor/tests/tests/microformats-v1/hnews/minimum.html +25 -0
- data/vendor/tests/tests/microformats-v1/hnews/minimum.json +48 -0
- data/vendor/tests/tests/microformats-v1/hproduct/aggregate.html +26 -0
- data/vendor/tests/tests/microformats-v1/hproduct/aggregate.json +52 -0
- data/vendor/tests/tests/microformats-v1/hproduct/change-log.html +62 -0
- data/vendor/tests/tests/microformats-v1/hproduct/simpleproperties.html +13 -0
- data/vendor/tests/tests/microformats-v1/hproduct/simpleproperties.json +33 -0
- data/vendor/tests/tests/microformats-v1/hresume/affiliation.html +12 -0
- data/vendor/tests/tests/microformats-v1/hresume/affiliation.json +25 -0
- data/vendor/tests/tests/microformats-v1/hresume/change-log.html +73 -0
- data/vendor/tests/tests/microformats-v1/hresume/contact.html +18 -0
- data/vendor/tests/tests/microformats-v1/hresume/contact.json +32 -0
- data/vendor/tests/tests/microformats-v1/hresume/education.html +13 -0
- data/vendor/tests/tests/microformats-v1/hresume/education.json +29 -0
- data/vendor/tests/tests/microformats-v1/hresume/skill.html +12 -0
- data/vendor/tests/tests/microformats-v1/hresume/skill.json +33 -0
- data/vendor/tests/tests/microformats-v1/hresume/work.html +16 -0
- data/vendor/tests/tests/microformats-v1/hresume/work.json +30 -0
- data/vendor/tests/tests/microformats-v1/hreview-aggregate/change-log.html +67 -0
- data/vendor/tests/tests/microformats-v1/hreview-aggregate/hcard.html +18 -0
- data/vendor/tests/tests/microformats-v1/hreview-aggregate/hcard.json +31 -0
- data/vendor/tests/tests/microformats-v1/hreview-aggregate/justahyperlink.html +6 -0
- data/vendor/tests/tests/microformats-v1/hreview-aggregate/justahyperlink.json +19 -0
- data/vendor/tests/tests/microformats-v1/hreview-aggregate/vevent.html +13 -0
- data/vendor/tests/tests/microformats-v1/hreview-aggregate/vevent.json +22 -0
- data/vendor/tests/tests/microformats-v1/hreview/change-log.html +73 -0
- data/vendor/tests/tests/microformats-v1/hreview/item.html +8 -0
- data/vendor/tests/tests/microformats-v1/hreview/item.json +19 -0
- data/vendor/tests/tests/microformats-v1/hreview/vcard.html +23 -0
- data/vendor/tests/tests/microformats-v1/hreview/vcard.json +58 -0
- data/vendor/tests/tests/microformats-v1/includes/change-log.html +72 -0
- data/vendor/tests/tests/microformats-v1/includes/hcarditemref.html +16 -0
- data/vendor/tests/tests/microformats-v1/includes/hcarditemref.json +49 -0
- data/vendor/tests/tests/microformats-v1/includes/heventitemref.html +25 -0
- data/vendor/tests/tests/microformats-v1/includes/heventitemref.json +33 -0
- data/vendor/tests/tests/microformats-v1/includes/hyperlink.html +18 -0
- data/vendor/tests/tests/microformats-v1/includes/hyperlink.json +43 -0
- data/vendor/tests/tests/microformats-v1/includes/object.html +23 -0
- data/vendor/tests/tests/microformats-v1/includes/object.json +42 -0
- data/vendor/tests/tests/microformats-v1/includes/table.html +12 -0
- data/vendor/tests/tests/microformats-v1/includes/table.json +19 -0
- data/vendor/tests/tests/microformats-v2/h-adr/change-log.html +62 -0
- data/vendor/tests/tests/microformats-v2/h-adr/geo.html +10 -0
- data/vendor/tests/tests/microformats-v2/h-adr/geo.json +16 -0
- data/vendor/tests/tests/microformats-v2/h-adr/geourl.html +4 -0
- data/vendor/tests/tests/microformats-v2/h-adr/geourl.json +13 -0
- data/vendor/tests/tests/microformats-v2/h-adr/justaname.html +1 -0
- data/vendor/tests/tests/microformats-v2/h-adr/justaname.json +10 -0
- data/vendor/tests/tests/microformats-v2/h-adr/lettercase.html +21 -0
- data/vendor/tests/tests/microformats-v2/h-adr/lettercase.json +12 -0
- data/vendor/tests/tests/microformats-v2/h-adr/simpleproperties.html +8 -0
- data/vendor/tests/tests/microformats-v2/h-adr/simpleproperties.json +16 -0
- data/vendor/tests/tests/microformats-v2/h-as-note/note.html +56 -0
- data/vendor/tests/tests/microformats-v2/h-as-note/note.json +98 -0
- data/vendor/tests/tests/microformats-v2/h-card/baseurl.html +6 -0
- data/vendor/tests/tests/microformats-v2/h-card/baseurl.json +26 -0
- data/vendor/tests/tests/microformats-v2/h-card/change-log.html +100 -0
- data/vendor/tests/tests/microformats-v2/h-card/childimplied.html +7 -0
- data/vendor/tests/tests/microformats-v2/h-card/childimplied.json +12 -0
- data/vendor/tests/tests/microformats-v2/h-card/extendeddescription.html +10 -0
- data/vendor/tests/tests/microformats-v2/h-card/extendeddescription.json +15 -0
- data/vendor/tests/tests/microformats-v2/h-card/hcard.html +4 -0
- data/vendor/tests/tests/microformats-v2/h-card/hcard.json +19 -0
- data/vendor/tests/tests/microformats-v2/h-card/horghcard.html +4 -0
- data/vendor/tests/tests/microformats-v2/h-card/horghcard.json +19 -0
- data/vendor/tests/tests/microformats-v2/h-card/hyperlinkedphoto.html +3 -0
- data/vendor/tests/tests/microformats-v2/h-card/hyperlinkedphoto.json +12 -0
- data/vendor/tests/tests/microformats-v2/h-card/impliedname.html +15 -0
- data/vendor/tests/tests/microformats-v2/h-card/impliedname.json +90 -0
- data/vendor/tests/tests/microformats-v2/h-card/impliedphoto.html +11 -0
- data/vendor/tests/tests/microformats-v2/h-card/impliedphoto.json +72 -0
- data/vendor/tests/tests/microformats-v2/h-card/impliedurl.html +5 -0
- data/vendor/tests/tests/microformats-v2/h-card/impliedurl.json +45 -0
- data/vendor/tests/tests/microformats-v2/h-card/justahyperlink.html +1 -0
- data/vendor/tests/tests/microformats-v2/h-card/justahyperlink.json +11 -0
- data/vendor/tests/tests/microformats-v2/h-card/justaname.html +1 -0
- data/vendor/tests/tests/microformats-v2/h-card/justaname.json +10 -0
- data/vendor/tests/tests/microformats-v2/h-card/nested.html +4 -0
- data/vendor/tests/tests/microformats-v2/h-card/nested.json +18 -0
- data/vendor/tests/tests/microformats-v2/h-card/p-property.html +21 -0
- data/vendor/tests/tests/microformats-v2/h-card/p-property.json +15 -0
- data/vendor/tests/tests/microformats-v2/h-card/relativeurls.html +6 -0
- data/vendor/tests/tests/microformats-v2/h-card/relativeurls.json +29 -0
- data/vendor/tests/tests/microformats-v2/h-entry/change-log.html +90 -0
- data/vendor/tests/tests/microformats-v2/h-entry/encoding.html +3 -0
- data/vendor/tests/tests/microformats-v2/h-entry/encoding.json +14 -0
- data/vendor/tests/tests/microformats-v2/h-entry/impliedvalue-nested.html +9 -0
- data/vendor/tests/tests/microformats-v2/h-entry/impliedvalue-nested.json +27 -0
- data/vendor/tests/tests/microformats-v2/h-entry/justahyperlink.html +1 -0
- data/vendor/tests/tests/microformats-v2/h-entry/justahyperlink.json +11 -0
- data/vendor/tests/tests/microformats-v2/h-entry/justaname.html +1 -0
- data/vendor/tests/tests/microformats-v2/h-entry/justaname.json +10 -0
- data/vendor/tests/tests/microformats-v2/h-entry/scriptstyletags.html +4 -0
- data/vendor/tests/tests/microformats-v2/h-entry/scriptstyletags.json +14 -0
- data/vendor/tests/tests/microformats-v2/h-entry/summarycontent.html +20 -0
- data/vendor/tests/tests/microformats-v2/h-entry/summarycontent.json +25 -0
- data/vendor/tests/tests/microformats-v2/h-entry/u-property.html +33 -0
- data/vendor/tests/tests/microformats-v2/h-entry/u-property.json +12 -0
- data/vendor/tests/tests/microformats-v2/h-entry/urlincontent.html +13 -0
- data/vendor/tests/tests/microformats-v2/h-entry/urlincontent.json +14 -0
- data/vendor/tests/tests/microformats-v2/h-event/ampm.html +41 -0
- data/vendor/tests/tests/microformats-v2/h-event/ampm.json +21 -0
- data/vendor/tests/tests/microformats-v2/h-event/attendees.html +13 -0
- data/vendor/tests/tests/microformats-v2/h-event/attendees.json +37 -0
- data/vendor/tests/tests/microformats-v2/h-event/change-log.html +82 -0
- data/vendor/tests/tests/microformats-v2/h-event/combining.html +13 -0
- data/vendor/tests/tests/microformats-v2/h-event/combining.json +25 -0
- data/vendor/tests/tests/microformats-v2/h-event/concatenate.html +8 -0
- data/vendor/tests/tests/microformats-v2/h-event/concatenate.json +12 -0
- data/vendor/tests/tests/microformats-v2/h-event/dates.html +13 -0
- data/vendor/tests/tests/microformats-v2/h-event/dates.json +26 -0
- data/vendor/tests/tests/microformats-v2/h-event/dt-property.html +23 -0
- data/vendor/tests/tests/microformats-v2/h-event/dt-property.json +20 -0
- data/vendor/tests/tests/microformats-v2/h-event/justahyperlink.html +1 -0
- data/vendor/tests/tests/microformats-v2/h-event/justahyperlink.json +11 -0
- data/vendor/tests/tests/microformats-v2/h-event/justaname.html +1 -0
- data/vendor/tests/tests/microformats-v2/h-event/justaname.json +10 -0
- data/vendor/tests/tests/microformats-v2/h-event/time.html +47 -0
- data/vendor/tests/tests/microformats-v2/h-event/time.json +25 -0
- data/vendor/tests/tests/microformats-v2/h-feed/implied-title.html +30 -0
- data/vendor/tests/tests/microformats-v2/h-feed/implied-title.json +23 -0
- data/vendor/tests/tests/microformats-v2/h-feed/simple.html +26 -0
- data/vendor/tests/tests/microformats-v2/h-feed/simple.json +33 -0
- data/vendor/tests/tests/microformats-v2/h-geo/abbrpattern.html +5 -0
- data/vendor/tests/tests/microformats-v2/h-geo/abbrpattern.json +12 -0
- data/vendor/tests/tests/microformats-v2/h-geo/altitude.html +8 -0
- data/vendor/tests/tests/microformats-v2/h-geo/altitude.json +13 -0
- data/vendor/tests/tests/microformats-v2/h-geo/change-log.html +67 -0
- data/vendor/tests/tests/microformats-v2/h-geo/hidden.html +10 -0
- data/vendor/tests/tests/microformats-v2/h-geo/hidden.json +12 -0
- data/vendor/tests/tests/microformats-v2/h-geo/justaname.html +3 -0
- data/vendor/tests/tests/microformats-v2/h-geo/justaname.json +10 -0
- data/vendor/tests/tests/microformats-v2/h-geo/simpleproperties.html +5 -0
- data/vendor/tests/tests/microformats-v2/h-geo/simpleproperties.json +12 -0
- data/vendor/tests/tests/microformats-v2/h-geo/valuetitleclass.html +11 -0
- data/vendor/tests/tests/microformats-v2/h-geo/valuetitleclass.json +12 -0
- data/vendor/tests/tests/microformats-v2/h-news/all.html +35 -0
- data/vendor/tests/tests/microformats-v2/h-news/all.json +51 -0
- data/vendor/tests/tests/microformats-v2/h-news/change-log.html +78 -0
- data/vendor/tests/tests/microformats-v2/h-news/minimum.html +24 -0
- data/vendor/tests/tests/microformats-v2/h-news/minimum.json +40 -0
- data/vendor/tests/tests/microformats-v2/h-org/change-log.html +57 -0
- data/vendor/tests/tests/microformats-v2/h-org/hyperlink.html +1 -0
- data/vendor/tests/tests/microformats-v2/h-org/hyperlink.json +11 -0
- data/vendor/tests/tests/microformats-v2/h-org/simple.html +1 -0
- data/vendor/tests/tests/microformats-v2/h-org/simple.json +10 -0
- data/vendor/tests/tests/microformats-v2/h-org/simpleproperties.html +4 -0
- data/vendor/tests/tests/microformats-v2/h-org/simpleproperties.json +12 -0
- data/vendor/tests/tests/microformats-v2/h-product/aggregate.html +21 -0
- data/vendor/tests/tests/microformats-v2/h-product/aggregate.json +45 -0
- data/vendor/tests/tests/microformats-v2/h-product/change-log.html +62 -0
- data/vendor/tests/tests/microformats-v2/h-product/justahyperlink.html +1 -0
- data/vendor/tests/tests/microformats-v2/h-product/justahyperlink.json +11 -0
- data/vendor/tests/tests/microformats-v2/h-product/justaname.html +1 -0
- data/vendor/tests/tests/microformats-v2/h-product/justaname.json +10 -0
- data/vendor/tests/tests/microformats-v2/h-product/simpleproperties.html +10 -0
- data/vendor/tests/tests/microformats-v2/h-product/simpleproperties.json +26 -0
- data/vendor/tests/tests/microformats-v2/h-recipe/all.html +63 -0
- data/vendor/tests/tests/microformats-v2/h-recipe/all.json +54 -0
- data/vendor/tests/tests/microformats-v2/h-recipe/change-log.html +62 -0
- data/vendor/tests/tests/microformats-v2/h-recipe/minimum.html +7 -0
- data/vendor/tests/tests/microformats-v2/h-recipe/minimum.json +17 -0
- data/vendor/tests/tests/microformats-v2/h-resume/affiliation.html +12 -0
- data/vendor/tests/tests/microformats-v2/h-resume/affiliation.json +20 -0
- data/vendor/tests/tests/microformats-v2/h-resume/change-log.html +78 -0
- data/vendor/tests/tests/microformats-v2/h-resume/contact.html +17 -0
- data/vendor/tests/tests/microformats-v2/h-resume/contact.json +26 -0
- data/vendor/tests/tests/microformats-v2/h-resume/education.html +13 -0
- data/vendor/tests/tests/microformats-v2/h-resume/education.json +30 -0
- data/vendor/tests/tests/microformats-v2/h-resume/justaname.html +1 -0
- data/vendor/tests/tests/microformats-v2/h-resume/justaname.json +10 -0
- data/vendor/tests/tests/microformats-v2/h-resume/skill.html +12 -0
- data/vendor/tests/tests/microformats-v2/h-resume/skill.json +12 -0
- data/vendor/tests/tests/microformats-v2/h-resume/work.html +16 -0
- data/vendor/tests/tests/microformats-v2/h-resume/work.json +31 -0
- data/vendor/tests/tests/microformats-v2/h-review-aggregate/change-log.html +78 -0
- data/vendor/tests/tests/microformats-v2/h-review-aggregate/hevent.html +13 -0
- data/vendor/tests/tests/microformats-v2/h-review-aggregate/hevent.json +23 -0
- data/vendor/tests/tests/microformats-v2/h-review-aggregate/justahyperlink.html +8 -0
- data/vendor/tests/tests/microformats-v2/h-review-aggregate/justahyperlink.json +19 -0
- data/vendor/tests/tests/microformats-v2/h-review-aggregate/simpleproperties.html +18 -0
- data/vendor/tests/tests/microformats-v2/h-review-aggregate/simpleproperties.json +26 -0
- data/vendor/tests/tests/microformats-v2/h-review/change-log.html +84 -0
- data/vendor/tests/tests/microformats-v2/h-review/hyperlink.html +1 -0
- data/vendor/tests/tests/microformats-v2/h-review/hyperlink.json +11 -0
- data/vendor/tests/tests/microformats-v2/h-review/implieditem.html +4 -0
- data/vendor/tests/tests/microformats-v2/h-review/implieditem.json +19 -0
- data/vendor/tests/tests/microformats-v2/h-review/item.html +8 -0
- data/vendor/tests/tests/microformats-v2/h-review/item.json +20 -0
- data/vendor/tests/tests/microformats-v2/h-review/justaname.html +1 -0
- data/vendor/tests/tests/microformats-v2/h-review/justaname.json +10 -0
- data/vendor/tests/tests/microformats-v2/h-review/photo.html +1 -0
- data/vendor/tests/tests/microformats-v2/h-review/photo.json +11 -0
- data/vendor/tests/tests/microformats-v2/h-review/vcard.html +23 -0
- data/vendor/tests/tests/microformats-v2/h-review/vcard.json +48 -0
- data/vendor/tests/tests/microformats-v2/rel/change-log.html +67 -0
- data/vendor/tests/tests/microformats-v2/rel/duplicate-rels.html +10 -0
- data/vendor/tests/tests/microformats-v2/rel/duplicate-rels.json +75 -0
- data/vendor/tests/tests/microformats-v2/rel/license.html +1 -0
- data/vendor/tests/tests/microformats-v2/rel/license.json +12 -0
- data/vendor/tests/tests/microformats-v2/rel/nofollow.html +1 -0
- data/vendor/tests/tests/microformats-v2/rel/nofollow.json +12 -0
- data/vendor/tests/tests/microformats-v2/rel/rel-urls.html +8 -0
- data/vendor/tests/tests/microformats-v2/rel/rel-urls.json +33 -0
- data/vendor/tests/tests/microformats-v2/rel/varying-text-duplicate-rels.html +4 -0
- data/vendor/tests/tests/microformats-v2/rel/varying-text-duplicate-rels.json +20 -0
- data/vendor/tests/tests/microformats-v2/rel/xfn-all.html +19 -0
- data/vendor/tests/tests/microformats-v2/rel/xfn-all.json +92 -0
- data/vendor/tests/tests/microformats-v2/rel/xfn-elsewhere.html +10 -0
- data/vendor/tests/tests/microformats-v2/rel/xfn-elsewhere.json +40 -0
- metadata +533 -57
- data/CHANGELOG.rdoc +0 -6
- data/LICENSE +0 -20
- data/README.rdoc +0 -147
- data/lib/address.rb +0 -97
- data/lib/calendar.rb +0 -40
- data/lib/event.rb +0 -156
- data/lib/formatting_helpers.rb +0 -57
- data/lib/helpers.rb +0 -74
- data/lib/vcard.rb +0 -177
@@ -0,0 +1,105 @@
|
|
1
|
+
module Microformats
|
2
|
+
|
3
|
+
class Parser < ParserCore
|
4
|
+
attr_reader :http_headers, :http_body
|
5
|
+
def initialize
|
6
|
+
@http_headers = {}
|
7
|
+
super
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse(html, base: nil, headers:{})
|
11
|
+
|
12
|
+
@http_headers = {}
|
13
|
+
|
14
|
+
@items = []
|
15
|
+
@rels = {}
|
16
|
+
@rel_urls = {}
|
17
|
+
|
18
|
+
@alternates = []
|
19
|
+
|
20
|
+
@base = base
|
21
|
+
|
22
|
+
html = read_html(html, headers: headers)
|
23
|
+
document = Nokogiri::HTML(html)
|
24
|
+
|
25
|
+
found_base = parse_base(document)
|
26
|
+
@base = found_base unless found_base.nil?
|
27
|
+
|
28
|
+
document.traverse do |node|
|
29
|
+
if not node.attribute('src').nil?
|
30
|
+
absolute_url = Microformats::AbsoluteUri.new(node.attribute('src').value.to_s, base: @base).absolutize
|
31
|
+
node.attribute('src').value = absolute_url
|
32
|
+
|
33
|
+
elsif not node.attribute('href').nil?
|
34
|
+
absolute_url = Microformats::AbsoluteUri.new(node.attribute('href').value.to_s, base: @base).absolutize
|
35
|
+
node.attribute('href').value = absolute_url
|
36
|
+
end
|
37
|
+
end
|
38
|
+
parse_node(document)
|
39
|
+
parse_rels(document)
|
40
|
+
|
41
|
+
Collection.new({'items' => @items, 'rels' => @rels, 'rel-urls' => @rel_urls})
|
42
|
+
end
|
43
|
+
|
44
|
+
def read_html(html, headers:{})
|
45
|
+
open(html, headers) do |response|
|
46
|
+
@http_headers = response.meta if response.respond_to?(:meta)
|
47
|
+
@http_body = response.read
|
48
|
+
end
|
49
|
+
@http_body
|
50
|
+
rescue Errno::ENOENT, Errno::ENAMETOOLONG => e
|
51
|
+
@http_body = html
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def parse_element(element)
|
57
|
+
|
58
|
+
fmt_classes = format_classes(element)
|
59
|
+
bc_fmt_classes = backcompat_format_classes(element).reject do |format_class|
|
60
|
+
fmt_classes.include? format_class
|
61
|
+
end
|
62
|
+
joined_classes = fmt_classes + bc_fmt_classes
|
63
|
+
|
64
|
+
if bc_fmt_classes.length >= 1
|
65
|
+
@items << FormatParser.new.parse(element, base: @base, format_class_array: joined_classes, backcompat: true)
|
66
|
+
elsif fmt_classes.length >= 1
|
67
|
+
@items << FormatParser.new.parse(element, base: @base, format_class_array: fmt_classes )
|
68
|
+
else
|
69
|
+
parse_nodeset(element.children)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def parse_base(document)
|
74
|
+
base = document.search('base').first
|
75
|
+
base.values.first unless base.nil?
|
76
|
+
end
|
77
|
+
|
78
|
+
def parse_rels(element)
|
79
|
+
element.search('*[@rel]').each do |rel|
|
80
|
+
unless rel.attribute('href').nil?
|
81
|
+
url = Microformats::AbsoluteUri.new(rel.attribute('href').text, base: @base).absolutize
|
82
|
+
|
83
|
+
rel_values = rel.attribute('rel').text.split(' ')
|
84
|
+
rel_values.each do |rel_value|
|
85
|
+
@rels[rel_value] = [] unless @rels.has_key?(rel_value)
|
86
|
+
@rels[rel_value] << Microformats::AbsoluteUri.new(rel.attribute('href').text, base: @base).absolutize
|
87
|
+
@rels[rel_value].uniq!
|
88
|
+
end
|
89
|
+
|
90
|
+
@rel_urls[url] = {} unless @rel_urls.has_key?(url)
|
91
|
+
|
92
|
+
@rel_urls[url]['hreflang'] = rel.attribute('hreflang').value if @rel_urls[url]['hreflang'].nil? and not rel.attribute('hreflang').nil?
|
93
|
+
@rel_urls[url]['media'] = rel.attribute('media').value if @rel_urls[url]['media'].nil? and not rel.attribute('media').nil?
|
94
|
+
@rel_urls[url]['title'] = rel.attribute('title').value if @rel_urls[url]['title'].nil? and not rel.attribute('title').nil?
|
95
|
+
@rel_urls[url]['type'] = rel.attribute('type').value if @rel_urls[url]['type'].nil? and not rel.attribute('type').nil?
|
96
|
+
@rel_urls[url]['text'] = rel.text.strip if @rel_urls[url]['text'].nil? and not rel.text.empty?
|
97
|
+
@rel_urls[url]['rels'] = rel_values
|
98
|
+
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
@@ -0,0 +1,343 @@
|
|
1
|
+
module Microformats
|
2
|
+
class ParserCore
|
3
|
+
|
4
|
+
VALUE_CLASS_REG_EXP = /^value$/
|
5
|
+
VALUE_TITLE_CLASS_REG_EXP = /^value-title$/
|
6
|
+
FORMAT_CLASS_REG_EXP = /^h-[a-z-]+$/
|
7
|
+
PROPERTY_CLASS_REG_EXP = /^(p-|u-|dt-|e-)[a-z-]+$/
|
8
|
+
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@mode_backcompat = false
|
12
|
+
@fmt_classes = []
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def parse_node(node)
|
18
|
+
if node.is_a?(Nokogiri::HTML::Document) then
|
19
|
+
parse_node(node.children)
|
20
|
+
elsif node.is_a?(Nokogiri::XML::NodeSet) then
|
21
|
+
parse_nodeset(node)
|
22
|
+
elsif node.is_a?(Nokogiri::XML::Element) then
|
23
|
+
parse_element(node)
|
24
|
+
else
|
25
|
+
nil
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def parse_nodeset(nodeset)
|
31
|
+
nodeset.each do |node|
|
32
|
+
parse_node(node)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def format_classes(element)
|
37
|
+
element.attribute('class').to_s.split.select do |html_class|
|
38
|
+
html_class =~ FORMAT_CLASS_REG_EXP
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def backcompat_format_classes(element)
|
43
|
+
result_set = []
|
44
|
+
html_classes = element.attribute('class').to_s.split
|
45
|
+
|
46
|
+
html_classes.each do |html_class|
|
47
|
+
case html_class
|
48
|
+
|
49
|
+
when /^adr$/
|
50
|
+
result_set << 'h-adr'
|
51
|
+
when /^geo$/
|
52
|
+
result_set << 'h-geo'
|
53
|
+
when /^h[eE]ntry$/
|
54
|
+
result_set << 'h-entry'
|
55
|
+
when /^h[pP]roduct$/
|
56
|
+
result_set << 'h-product'
|
57
|
+
when /^h[rR]ecipe$/
|
58
|
+
result_set << 'h-recipe'
|
59
|
+
when /^h[rR]esume$/
|
60
|
+
result_set << 'h-resume'
|
61
|
+
when /^h[rR]eview$/
|
62
|
+
result_set << 'h-review'
|
63
|
+
when /^h[rR]eview-aggregate$/
|
64
|
+
result_set << 'h-review-aggregate'
|
65
|
+
when /^[vh][eE]vent$/
|
66
|
+
result_set << 'h-event'
|
67
|
+
when /^[vh][cC]ard$/
|
68
|
+
result_set << 'h-card'
|
69
|
+
|
70
|
+
#these aren't actually specified by the backcompat faq, but probably should parse them
|
71
|
+
when /^h[fF]eed$/
|
72
|
+
result_set << 'h-feed'
|
73
|
+
when /^h[nN]ews$/
|
74
|
+
result_set << 'h-news'
|
75
|
+
|
76
|
+
else
|
77
|
+
if @fmt_classes.include? 'h-entry' and html_class == 'author'
|
78
|
+
result_set << 'h-card'
|
79
|
+
end
|
80
|
+
if @fmt_classes.include? 'h-product' and html_class == 'review'
|
81
|
+
result_set << 'h-review'
|
82
|
+
end
|
83
|
+
if @fmt_classes.include? 'h-recipe' and html_class == 'author'
|
84
|
+
result_set << 'h-card'
|
85
|
+
end
|
86
|
+
if @fmt_classes.include? 'h-resume' and html_class == 'contact'
|
87
|
+
result_set << 'h-card'
|
88
|
+
end
|
89
|
+
if @fmt_classes.include? 'h-resume' and html_class == 'education'
|
90
|
+
result_set << 'h-event'
|
91
|
+
end
|
92
|
+
if @fmt_classes.include? 'h-resume' and html_class == 'experience'
|
93
|
+
result_set << 'h-event'
|
94
|
+
end
|
95
|
+
if @fmt_classes.include? 'h-resume' and html_class == 'affiliation'
|
96
|
+
result_set << 'h-card'
|
97
|
+
end
|
98
|
+
if @fmt_classes.include? 'h-review' and html_class == 'reviewer'
|
99
|
+
result_set << 'h-card'
|
100
|
+
end
|
101
|
+
if @fmt_classes.include? 'h-review' and html_class == 'item'
|
102
|
+
if not html_classes.include? 'vcard' and not html_classes.include? 'vevent' and not html_classes.include? 'hproduct'
|
103
|
+
result_set << 'h-item'
|
104
|
+
end
|
105
|
+
end
|
106
|
+
if @fmt_classes.include? 'h-review-aggregate' and html_class == 'item'
|
107
|
+
if not html_classes.include? 'vcard' and not html_classes.include? 'vevent' and not html_classes.include? 'hproduct'
|
108
|
+
result_set << 'h-item'
|
109
|
+
end
|
110
|
+
end
|
111
|
+
if @fmt_classes.include? 'h-review-aggregate' and html_class == 'reviewer'
|
112
|
+
result_set << 'h-card'
|
113
|
+
end
|
114
|
+
if @fmt_classes.include? 'h-entry' and html_class == 'location'
|
115
|
+
result_set << 'h-adr'
|
116
|
+
result_set << 'h-card'
|
117
|
+
end
|
118
|
+
if @fmt_classes.include? 'h-feed' and html_class == 'author'
|
119
|
+
result_set << 'h-card'
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
result_set.uniq
|
125
|
+
end
|
126
|
+
|
127
|
+
def property_classes(element)
|
128
|
+
element.attribute('class').to_s.split.select do |html_class|
|
129
|
+
html_class =~ PROPERTY_CLASS_REG_EXP
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def backcompat_property_classes(element)
|
134
|
+
result_set = []
|
135
|
+
rels = element.attribute('rel').to_s.split
|
136
|
+
|
137
|
+
if @fmt_classes.include? 'h-entry' and rels.include? 'bookmark'
|
138
|
+
result_set << 'u-url'
|
139
|
+
end
|
140
|
+
if @fmt_classes.include? 'h-entry' and rels.include? 'tag'
|
141
|
+
result_set << 'p-category'
|
142
|
+
end
|
143
|
+
if @fmt_classes.include? 'h-recipe' and rels.include? 'tag'
|
144
|
+
result_set << 'p-category'
|
145
|
+
end
|
146
|
+
if @fmt_classes.include? 'h-review' and rels.include? 'tag'
|
147
|
+
result_set << 'p-category'
|
148
|
+
end
|
149
|
+
if @fmt_classes.include? 'h-feed' and rels.include? 'tag'
|
150
|
+
result_set << 'p-category'
|
151
|
+
end
|
152
|
+
if @fmt_classes.include? 'h-review' and rels.include? 'self' and rels.include? 'bookmark'
|
153
|
+
result_set << 'u-url'
|
154
|
+
end
|
155
|
+
if @fmt_classes.include? 'h-news' and rels.include? 'principles'
|
156
|
+
result_set << 'u-principles'
|
157
|
+
end
|
158
|
+
|
159
|
+
#TODO PROPOSED convert time.entry-date[datetime] to dt-published see wiki/h-entry
|
160
|
+
#TODO PROPOSED convert rel=author to u-author see wiki/h-entry
|
161
|
+
|
162
|
+
element.attribute('class').to_s.split.each do |html_class|
|
163
|
+
|
164
|
+
if @fmt_classes.include? 'h-adr'
|
165
|
+
if [ 'post-office-box', 'extended-address', 'street-address', 'locality', 'region', 'postal-code', 'country-name' ].include? html_class
|
166
|
+
result_set << 'p-' + html_class
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
if @fmt_classes.include? 'h-geo'
|
171
|
+
if [ 'longitude', 'latitude' ].include? html_class
|
172
|
+
result_set << 'p-' + html_class
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
if @fmt_classes.include? 'h-entry'
|
177
|
+
if html_class == 'entry-title'
|
178
|
+
result_set << 'p-name'
|
179
|
+
elsif html_class == 'entry-summary'
|
180
|
+
result_set << 'p-summary'
|
181
|
+
elsif html_class == 'entry-content'
|
182
|
+
result_set << 'e-content'
|
183
|
+
elsif ['updated', 'published'].include? html_class
|
184
|
+
result_set << 'dt-' + html_class
|
185
|
+
elsif [ 'category', 'author' ].include? html_class
|
186
|
+
result_set << 'p-' + html_class
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
#h-news isn't even listed in backcompat list, adding to follow test suite
|
191
|
+
if @fmt_classes.include? 'h-news'
|
192
|
+
if [ 'source-org', 'entry', 'dateline', 'geo' ].include? html_class
|
193
|
+
result_set << 'p-' + html_class
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
if @fmt_classes.include? 'h-feed'
|
198
|
+
if html_class == 'fn'
|
199
|
+
result_set << 'p-name'
|
200
|
+
elsif [ 'author', 'summary' ].include? html_class
|
201
|
+
result_set << 'p-' + html_class
|
202
|
+
elsif ['photo', 'url'].include? html_class
|
203
|
+
result_set << 'u-' + html_class
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
if @fmt_classes.include? 'h-item'
|
208
|
+
if html_class == 'fn'
|
209
|
+
result_set << 'p-name'
|
210
|
+
elsif ['photo', 'url'].include? html_class
|
211
|
+
result_set << 'u-' + html_class
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
if @fmt_classes.include? 'h-product'
|
216
|
+
if html_class == 'fn'
|
217
|
+
result_set << 'p-name'
|
218
|
+
elsif ['photo', 'url', 'identifier'].include? html_class
|
219
|
+
result_set << 'u-' + html_class
|
220
|
+
elsif [ 'brand', 'category', 'price', 'review', 'description' ].include? html_class
|
221
|
+
result_set << 'p-' + html_class
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
if @fmt_classes.include? 'h-recipe'
|
226
|
+
if html_class == 'fn'
|
227
|
+
result_set << 'p-name'
|
228
|
+
elsif html_class == 'instructions'
|
229
|
+
result_set << 'e-instructions'
|
230
|
+
elsif html_class == 'duration'
|
231
|
+
result_set << 'dt-duration'
|
232
|
+
elsif html_class == 'photo'
|
233
|
+
result_set << 'u-photo'
|
234
|
+
elsif [ 'ingredient', 'category', 'yield', 'summary', 'nutrition', 'author' ].include? html_class
|
235
|
+
result_set << 'p-' + html_class
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
if @fmt_classes.include? 'h-resume'
|
240
|
+
if [ 'skill', 'summary', 'contact', 'education', 'experience', 'affiliation' ].include? html_class
|
241
|
+
result_set << 'p-' + html_class
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
if @fmt_classes.include? 'h-review'
|
246
|
+
if html_class == 'summary'
|
247
|
+
result_set << 'p-name'
|
248
|
+
elsif html_class == 'fn'
|
249
|
+
result_set << 'p-name'
|
250
|
+
elsif html_class == 'reviewer'
|
251
|
+
result_set << 'p-author'
|
252
|
+
elsif html_class == 'dtreviewed'
|
253
|
+
result_set << 'dt-published'
|
254
|
+
elsif html_class == 'description'
|
255
|
+
result_set << 'e-content'
|
256
|
+
elsif ['photo', 'url', 'identifier'].include? html_class
|
257
|
+
result_set << 'u-' + html_class
|
258
|
+
elsif [ 'rating', 'best', 'worst', 'item'].include? html_class
|
259
|
+
result_set << 'p-' + html_class
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
if @fmt_classes.include? 'h-review-aggregate'
|
264
|
+
if html_class == 'summary'
|
265
|
+
result_set << 'p-name'
|
266
|
+
elsif html_class == 'fn'
|
267
|
+
result_set << 'p-name'
|
268
|
+
elsif html_class == 'reviewer'
|
269
|
+
result_set << 'p-author'
|
270
|
+
elsif html_class == 'dtreviewed'
|
271
|
+
result_set << 'dt-published'
|
272
|
+
elsif html_class == 'description'
|
273
|
+
result_set << 'e-content'
|
274
|
+
elsif ['photo', 'url', 'identifier'].include? html_class
|
275
|
+
result_set << 'u-' + html_class
|
276
|
+
elsif [ 'rating', 'best', 'worst', 'item', 'count', 'votes', 'average' ].include? html_class
|
277
|
+
result_set << 'p-' + html_class
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
if @fmt_classes.include? 'h-event'
|
282
|
+
if html_class == 'summary'
|
283
|
+
result_set << 'p-name'
|
284
|
+
elsif html_class == 'dtstart'
|
285
|
+
result_set << 'dt-start'
|
286
|
+
elsif html_class == 'dtend'
|
287
|
+
result_set << 'dt-end'
|
288
|
+
elsif html_class == 'duration'
|
289
|
+
result_set << 'dt-duration'
|
290
|
+
elsif html_class == 'geo'
|
291
|
+
result_set << 'p-location'
|
292
|
+
elsif ['url'].include? html_class
|
293
|
+
result_set << 'u-' + html_class
|
294
|
+
elsif [ 'description', 'category', 'location', 'attendee'].include? html_class
|
295
|
+
result_set << 'p-' + html_class
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
if @fmt_classes.include? 'h-card'
|
300
|
+
if html_class == 'fn'
|
301
|
+
result_set << 'p-name'
|
302
|
+
elsif html_class == 'bday'
|
303
|
+
result_set << 'dt-bday'
|
304
|
+
elsif html_class == 'title'
|
305
|
+
result_set << 'p-job-title'
|
306
|
+
elsif html_class == 'rev'
|
307
|
+
result_set << 'dt-rev'
|
308
|
+
elsif ['email', 'logo', 'photo', 'url', 'uid', 'key'].include? html_class
|
309
|
+
result_set << 'u-' + html_class
|
310
|
+
elsif [ 'honorific-prefix', 'given-name', 'additional-name', 'family-name', 'honorific-suffix', 'nickname',
|
311
|
+
'category', 'adr', 'extended-address', 'street-address', 'locality', 'region', 'postal-code', 'country-name',
|
312
|
+
'label', 'geo', 'latitude', 'longitude', 'tel', 'note', 'org', 'organization-name', 'organization-unit', 'role', 'tz' ].include? html_class
|
313
|
+
result_set << 'p-' + html_class
|
314
|
+
|
315
|
+
#these aren't listed in the wiki, may be removed
|
316
|
+
elsif ['sound'].include? html_class
|
317
|
+
result_set << 'u-' + html_class
|
318
|
+
#these aren't listed in the wiki, may be removed
|
319
|
+
elsif ['agent', 'mailer', 'sort-string', 'class'].include? html_class
|
320
|
+
result_set << 'p-' + html_class
|
321
|
+
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
end
|
326
|
+
result_set.uniq
|
327
|
+
end
|
328
|
+
|
329
|
+
def value_classes(element)
|
330
|
+
element.attribute('class').to_s.split.select do |html_class|
|
331
|
+
html_class =~ VALUE_CLASS_REG_EXP
|
332
|
+
end
|
333
|
+
end
|
334
|
+
def value_title_classes(element)
|
335
|
+
element.attribute('class').to_s.split.select do |html_class|
|
336
|
+
html_class =~ VALUE_TITLE_CLASS_REG_EXP
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
end
|
341
|
+
|
342
|
+
end
|
343
|
+
|
@@ -0,0 +1,122 @@
|
|
1
|
+
module Microformats
|
2
|
+
class PropertyParser < ParserCore
|
3
|
+
|
4
|
+
def parse(element, base: nil, element_type: , format_class_array: [], backcompat: nil)
|
5
|
+
@base = base
|
6
|
+
@value = nil
|
7
|
+
@property_type = element_type
|
8
|
+
|
9
|
+
@fmt_classes = format_class_array
|
10
|
+
@mode_backcompat = backcompat
|
11
|
+
|
12
|
+
if element_type == 'p'
|
13
|
+
parse_value_class_pattern(element)
|
14
|
+
|
15
|
+
if @value.nil?
|
16
|
+
if element.name == 'abbr' and not element.attribute('title').nil?
|
17
|
+
@value = element.attribute('title').value.strip
|
18
|
+
elsif (element.name == 'data' or element.name == 'input') and not element.attribute('value').nil?
|
19
|
+
@value = element.attribute('value').value.strip
|
20
|
+
elsif (element.name == 'img' or element.name == 'area') and not element.attribute('alt').nil?
|
21
|
+
@value = element.attribute('alt').value.strip
|
22
|
+
else
|
23
|
+
@value = render_text_and_replace_images(element, base: @base)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
elsif element_type == 'e'
|
28
|
+
@value = {
|
29
|
+
value: render_text(element, base: @base), #TODO the spec doesn't say to remove script and style tags, assuming this to be in error
|
30
|
+
html: element.inner_html.gsub(/\A +/, '').gsub(/ +\Z/, '')
|
31
|
+
}
|
32
|
+
|
33
|
+
elsif element_type == 'u'
|
34
|
+
if ['a', 'area'].include? element.name and not element.attribute('href').nil?
|
35
|
+
@value = element.attribute('href').value.strip
|
36
|
+
elsif ['img', 'audio', 'video', 'source'].include? element.name and not element.attribute('src').nil?
|
37
|
+
@value = element.attribute('src').value.strip
|
38
|
+
elsif element.name == 'video' and not element.attribute('poster').nil?
|
39
|
+
@value = element.attribute('poster').value.strip
|
40
|
+
elsif element.name == 'object' and not element.attribute('data').nil?
|
41
|
+
@value = element.attribute('data').value.strip
|
42
|
+
end
|
43
|
+
|
44
|
+
if not @value.nil?
|
45
|
+
@value = Microformats::AbsoluteUri.new(@value, base: @base).absolutize
|
46
|
+
else
|
47
|
+
|
48
|
+
parse_value_class_pattern(element)
|
49
|
+
|
50
|
+
if @value.nil?
|
51
|
+
if element.name == 'abbr' and not element.attribute('title').nil?
|
52
|
+
@value = element.attribute('title').value.strip
|
53
|
+
elsif (element.name == 'data' or element.name == 'input') and not element.attribute('value').nil?
|
54
|
+
@value = element.attribute('value').value.strip
|
55
|
+
else
|
56
|
+
@value = render_text(element, base: @base)
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
elsif element_type == 'dt'
|
63
|
+
@value = Microformats::TimePropertyParser.new.parse(element, base: base, element_type: element_type, format_class_array: format_class_array, backcompat: backcompat)
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
@value
|
68
|
+
end
|
69
|
+
|
70
|
+
def parse_value_class_pattern(element)
|
71
|
+
@value_class_pattern_value = []
|
72
|
+
parse_node(element.children)
|
73
|
+
@value = @value_class_pattern_value.join unless @value_class_pattern_value.empty?
|
74
|
+
end
|
75
|
+
|
76
|
+
def parse_element(element)
|
77
|
+
if value_title_classes(element).length >= 1
|
78
|
+
@value_class_pattern_value << element.attribute('title').value.strip
|
79
|
+
|
80
|
+
elsif value_classes(element).length >= 1
|
81
|
+
if element.name == 'img' or element.name == 'area' and not element.attribute('alt').nil?
|
82
|
+
@value_class_pattern_value << element.attribute('alt').value.strip
|
83
|
+
elsif element.name == 'data' and not element.attribute('value').nil?
|
84
|
+
@value_class_pattern_value << element.attribute('value').value.strip
|
85
|
+
elsif element.name == 'abbr' and not element.attribute('title').nil?
|
86
|
+
@value_class_pattern_value << element.attribute('title').value.strip
|
87
|
+
else
|
88
|
+
@value_class_pattern_value << element.text.strip
|
89
|
+
end
|
90
|
+
else
|
91
|
+
p_classes = property_classes(element)
|
92
|
+
p_classes = backcompat_property_classes(element) if @mode_backcompat
|
93
|
+
if p_classes.length == 0 and format_classes(element).length == 0
|
94
|
+
parse_node(element.children)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def render_text_and_replace_images(node, base: nil)
|
100
|
+
new_doc = Nokogiri::HTML(node.inner_html)
|
101
|
+
new_doc.xpath('//script').remove
|
102
|
+
new_doc.xpath('//style').remove
|
103
|
+
new_doc.traverse do |node|
|
104
|
+
if node.name == 'img' and not node.attribute('alt').nil?
|
105
|
+
node.replace(' ' + node.attribute('alt').value.to_s + ' ')
|
106
|
+
elsif node.name == 'img' and not node.attribute('src').nil?
|
107
|
+
absolute_url = Microformats::AbsoluteUri.new(node.attribute('src').value.to_s, base: @base).absolutize
|
108
|
+
node.replace(' ' + absolute_url + ' ')
|
109
|
+
end
|
110
|
+
end
|
111
|
+
new_doc.text.strip
|
112
|
+
end
|
113
|
+
|
114
|
+
def render_text(node, base: nil)
|
115
|
+
new_doc = Nokogiri::HTML(node.inner_html)
|
116
|
+
new_doc.xpath('//script').remove
|
117
|
+
new_doc.xpath('//style').remove
|
118
|
+
new_doc.text.strip
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
end
|