RubyGems - fblee-feedzirra - Versions diffs - 0.0.17 - Mend

fblee-feedzirra 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

data/README.textile +196 -0
data/Rakefile +56 -0
data/lib/core_ext/date.rb +21 -0
data/lib/core_ext/string.rb +9 -0
data/lib/feedzirra/feed.rb +357 -0
data/lib/feedzirra/feed_entry_utilities.rb +81 -0
data/lib/feedzirra/feed_utilities.rb +71 -0
data/lib/feedzirra/parser/atom.rb +36 -0
data/lib/feedzirra/parser/atom_entry.rb +41 -0
data/lib/feedzirra/parser/atom_feed_burner.rb +28 -0
data/lib/feedzirra/parser/atom_feed_burner_entry.rb +37 -0
data/lib/feedzirra/parser/itunes_rss.rb +50 -0
data/lib/feedzirra/parser/itunes_rss_item.rb +31 -0
data/lib/feedzirra/parser/itunes_rss_owner.rb +12 -0
data/lib/feedzirra/parser/rss.rb +36 -0
data/lib/feedzirra/parser/rss_entry.rb +45 -0
data/lib/feedzirra/web_page.rb +8 -0
data/lib/feedzirra.rb +35 -0
data/spec/feedzirra/feed_entry_utilities_spec.rb +62 -0
data/spec/feedzirra/feed_spec.rb +595 -0
data/spec/feedzirra/feed_utilities_spec.rb +149 -0
data/spec/feedzirra/parser/atom_entry_spec.rb +49 -0
data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +42 -0
data/spec/feedzirra/parser/atom_feed_burner_spec.rb +39 -0
data/spec/feedzirra/parser/atom_spec.rb +43 -0
data/spec/feedzirra/parser/itunes_rss_item_spec.rb +48 -0
data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +18 -0
data/spec/feedzirra/parser/itunes_rss_spec.rb +50 -0
data/spec/feedzirra/parser/rss_entry_spec.rb +41 -0
data/spec/feedzirra/parser/rss_spec.rb +45 -0
data/spec/spec.opts +2 -0
data/spec/spec_helper.rb +70 -0
metadata +177 -0

data/spec/feedzirra/feed_utilities_spec.rb ADDED Viewed

@@ -0,0 +1,149 @@
+require File.dirname(__FILE__) + '/../spec_helper'
+describe Feedzirra::FeedUtilities do
+  before(:each) do
+    @klass = Class.new do
+      include Feedzirra::FeedUtilities
+    end
+  end
+  describe "instance methods" do
+    it "should provide an updated? accessor" do
+      feed = @klass.new
+      feed.should_not be_updated
+      feed.updated = true
+      feed.should be_updated
+    end
+    it "should provide a new_entries accessor" do
+      feed = @klass.new
+      feed.new_entries.should == []
+      feed.new_entries = [:foo]
+      feed.new_entries.should == [:foo]
+    end
+    it "should provide an etag accessor" do
+      feed = @klass.new
+      feed.etag = "foo"
+      feed.etag.should == "foo"
+    end
+    it "should provide a last_modified accessor" do
+      feed = @klass.new
+      time = Time.now
+      feed.last_modified = time
+      feed.last_modified.should == time
+    end
+    it "should return new_entries? as true when entries are put into new_entries" do
+      feed = @klass.new
+      feed.new_entries << :foo
+      feed.should have_new_entries
+    end
+    it "should return a last_modified value from the entry with the most recent published date if the last_modified date hasn't been set" do
+      feed = Feedzirra::Parser::Atom.new
+      entry =Feedzirra::Parser::AtomEntry.new
+      entry.published = Time.now.to_s
+      feed.entries << entry
+      feed.last_modified.should == entry.published
+    end
+    it "should not throw an error if one of the entries has published date of nil" do
+      feed = Feedzirra::Parser::Atom.new
+      entry = Feedzirra::Parser::AtomEntry.new
+      entry.published = Time.now.to_s
+      feed.entries << entry
+      feed.entries << Feedzirra::Parser::AtomEntry.new
+      feed.last_modified.should == entry.published
+    end
+  end
+  describe "#update_from_feed" do
+    describe "updating feed attributes" do
+      before(:each) do
+        # I'm using the Atom class when I know I should be using a different one. However, this update_from_feed
+        # method would only be called against a feed item.
+        @feed = Feedzirra::Parser::Atom.new
+        @feed.title    = "A title"
+        @feed.url      = "http://pauldix.net"
+        @feed.feed_url = "http://feeds.feedburner.com/PaulDixExplainsNothing"
+        @feed.updated  = false
+        @updated_feed = @feed.dup
+      end
+      it "should update the title if changed" do
+        @updated_feed.title = "new title"
+        @feed.update_from_feed(@updated_feed)
+        @feed.title.should == @updated_feed.title
+        @feed.should be_updated
+      end
+      it "should not update the title if the same" do
+        @feed.update_from_feed(@updated_feed)
+        @feed.should_not be_updated
+      end
+      it "should update the feed_url if changed" do
+        @updated_feed.feed_url = "a new feed url"
+        @feed.update_from_feed(@updated_feed)
+        @feed.feed_url.should == @updated_feed.feed_url
+        @feed.should be_updated
+      end
+      it "should not update the feed_url if the same" do
+        @feed.update_from_feed(@updated_feed)
+        @feed.should_not be_updated
+      end
+      it "should update the url if changed" do
+        @updated_feed.url = "a new url"
+        @feed.update_from_feed(@updated_feed)
+        @feed.url.should == @updated_feed.url
+      end
+      it "should not update the url if not changed" do
+        @feed.update_from_feed(@updated_feed)
+        @feed.should_not be_updated
+      end
+    end
+    describe "updating entries" do
+      before(:each) do
+        # I'm using the Atom class when I know I should be using a different one. However, this update_from_feed
+        # method would only be called against a feed item.
+        @feed = Feedzirra::Parser::Atom.new
+        @feed.title    = "A title"
+        @feed.url      = "http://pauldix.net"
+        @feed.feed_url = "http://feeds.feedburner.com/PaulDixExplainsNothing"
+        @feed.updated  = false
+        @updated_feed = @feed.dup
+        @old_entry = Feedzirra::Parser::AtomEntry.new
+        @old_entry.url = "http://pauldix.net/old.html"
+        @old_entry.published = Time.now.to_s
+        @new_entry = Feedzirra::Parser::AtomEntry.new
+        @new_entry.url = "http://pauldix.net/new.html"
+        @new_entry.published = (Time.now + 10).to_s
+        @feed.entries << @old_entry
+        @updated_feed.entries << @new_entry
+        @updated_feed.entries << @old_entry
+      end
+      it "should update last-modified from the latest entry date" do
+        @feed.update_from_feed(@updated_feed)
+        @feed.last_modified.should == @new_entry.published
+      end
+      it "should put new entries into new_entries" do
+        @feed.update_from_feed(@updated_feed)
+        @feed.new_entries.should == [@new_entry]
+      end
+      it "should also put new entries into the entries collection" do
+        @feed.update_from_feed(@updated_feed)
+        @feed.entries.should include(@new_entry)
+        @feed.entries.should include(@old_entry)
+      end
+    end
+  end
+end

data/spec/feedzirra/parser/atom_entry_spec.rb ADDED Viewed

@@ -0,0 +1,49 @@
+require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
+describe Feedzirra::Parser::AtomEntry do
+  before(:each) do
+    # I don't really like doing it this way because these unit test should only rely on AtomEntry,
+    # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
+    @entry = Feedzirra::Parser::Atom.parse(sample_atom_feed).entries.first
+  end
+  it "should parse the title" do
+    @entry.title.should == "AWS Job: Architect & Designer Position in Turkey"
+  end
+  it "should parse the url" do
+    @entry.url.should == "http://aws.typepad.com/aws/2009/01/aws-job-architect-designer-position-in-turkey.html"
+  end
+  it "should parse the url even when" do
+    Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).entries.first.url.should == "http://www.innoq.com/blog/phaus/2009/07/ja.html"
+  end
+  it "should parse the author" do
+    @entry.author.should == "AWS Editor"
+  end
+  it "should parse the content" do
+    @entry.content.should == sample_atom_entry_content
+  end
+  it "should provide a summary" do
+    @entry.summary.should == "Late last year an entrepreneur from Turkey visited me at Amazon HQ in Seattle. We talked about his plans to use AWS as part of his new social video portal startup. I won't spill any beans before he's ready to..."
+  end
+  it "should parse the published date" do
+    @entry.published.to_s.should == "Fri Jan 16 18:21:00 UTC 2009"
+  end
+  it "should parse the categories" do
+    @entry.categories.should == ['Turkey', 'Seattle']
+  end
+  it "should parse the updated date" do
+    @entry.updated.to_s.should == "Fri Jan 16 18:21:00 UTC 2009"
+  end
+  it "should parse the id" do
+    @entry.id.should == "tag:typepad.com,2003:post-61484736"
+  end
+end

data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb ADDED Viewed

@@ -0,0 +1,42 @@
+require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
+describe Feedzirra::Parser::AtomFeedBurnerEntry do
+  before(:each) do
+    # I don't really like doing it this way because these unit test should only rely on AtomEntry,
+    # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
+    @entry = Feedzirra::Parser::AtomFeedBurner.parse(sample_feedburner_atom_feed).entries.first
+  end
+  it "should parse the title" do
+    @entry.title.should == "Making a Ruby C library even faster"
+  end
+  it "should be able to fetch a url via the 'alternate' rel if no origLink exists" do
+    entry = Feedzirra::Parser::AtomFeedBurner.parse(File.read("#{File.dirname(__FILE__)}/../../sample_feeds/PaulDixExplainsNothingAlternate.xml")).entries.first
+    entry.url.should == 'http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~3/519925023/making-a-ruby-c-library-even-faster.html'
+  end
+  it "should parse the url" do
+    @entry.url.should == "http://www.pauldix.net/2009/01/making-a-ruby-c-library-even-faster.html"
+  end
+  it "should parse the author" do
+    @entry.author.should == "Paul Dix"
+  end
+  it "should parse the content" do
+    @entry.content.should == sample_feedburner_atom_entry_content
+  end
+  it "should provide a summary" do
+    @entry.summary.should == "Last week I released the first version of a SAX based XML parsing library called SAX-Machine. It uses Nokogiri, which uses libxml, so it's pretty fast. However, I felt that it could be even faster. The only question was how..."
+  end
+  it "should parse the published date" do
+    @entry.published.to_s.should == "Thu Jan 22 15:50:22 UTC 2009"
+  end
+  it "should parse the categories" do
+    @entry.categories.should == ['Ruby', 'Another Category']
+  end
+end

data/spec/feedzirra/parser/atom_feed_burner_spec.rb ADDED Viewed

@@ -0,0 +1,39 @@
+require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
+describe Feedzirra::Parser::AtomFeedBurner do
+  describe "#will_parse?" do
+    it "should return true for a feedburner atom feed" do
+      Feedzirra::Parser::AtomFeedBurner.should be_able_to_parse(sample_feedburner_atom_feed)
+    end
+    it "should return false for an rdf feed" do
+      Feedzirra::Parser::AtomFeedBurner.should_not be_able_to_parse(sample_rdf_feed)
+    end
+    it "should return false for a regular atom feed" do
+      Feedzirra::Parser::AtomFeedBurner.should_not be_able_to_parse(sample_atom_feed)
+    end
+  end
+  describe "parsing" do
+    before(:each) do
+      @feed = Feedzirra::Parser::AtomFeedBurner.parse(sample_feedburner_atom_feed)
+    end
+    it "should parse the title" do
+      @feed.title.should == "Paul Dix Explains Nothing"
+    end
+    it "should parse the url" do
+      @feed.url.should == "http://www.pauldix.net/"
+    end
+    it "should parse the feed_url" do
+      @feed.feed_url.should == "http://feeds.feedburner.com/PaulDixExplainsNothing"
+    end
+    it "should parse entries" do
+      @feed.entries.size.should == 5
+    end
+  end
+end

data/spec/feedzirra/parser/atom_spec.rb ADDED Viewed

@@ -0,0 +1,43 @@
+require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
+describe Feedzirra::Parser::Atom do
+  describe "#will_parse?" do
+    it "should return true for an atom feed" do
+      Feedzirra::Parser::Atom.should be_able_to_parse(sample_atom_feed)
+    end
+    it "should return false for an rdf feed" do
+      Feedzirra::Parser::Atom.should_not be_able_to_parse(sample_rdf_feed)
+    end
+  end
+  describe "parsing" do
+    before(:each) do
+      @feed = Feedzirra::Parser::Atom.parse(sample_atom_feed)
+    end
+    it "should parse the title" do
+      @feed.title.should == "Amazon Web Services Blog"
+    end
+    it "should parse the url" do
+      @feed.url.should == "http://aws.typepad.com/aws/"
+    end
+    it "should parse the url even when it doesn't have the type='text/html' attribute" do
+      Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).url.should == "http://www.innoq.com/planet/"
+    end
+    it "should parse the feed_url even when it doesn't have the type='application/atom+xml' attribute" do
+      Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).feed_url.should == "http://www.innoq.com/planet/atom.xml"
+    end
+    it "should parse the feed_url" do
+      @feed.feed_url.should == "http://aws.typepad.com/aws/atom.xml"
+    end
+    it "should parse entries" do
+      @feed.entries.size.should == 10
+    end
+  end
+end

data/spec/feedzirra/parser/itunes_rss_item_spec.rb ADDED Viewed

@@ -0,0 +1,48 @@
+require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
+describe Feedzirra::Parser::ITunesRSSItem do
+  before(:each) do
+    # I don't really like doing it this way because these unit test should only rely on ITunesRssItem,
+    # but this is actually how it should work. You would never just pass entry xml straight to the ITunesRssItem
+    @item = Feedzirra::Parser::ITunesRSS.parse(sample_itunes_feed).entries.first
+  end
+  it "should parse the title" do
+    @item.title.should == "Shake Shake Shake Your Spices"
+  end
+  it "should parse the author" do
+    @item.itunes_author.should == "John Doe"
+  end
+  it "should parse the subtitle" do
+    @item.itunes_subtitle.should == "A short primer on table spices"
+  end
+  it "should parse the summary" do
+    @item.itunes_summary.should == "This week we talk about salt and pepper shakers, comparing and contrasting pour rates, construction materials, and overall aesthetics. Come and join the party!"
+  end
+  it "should parse the enclosure" do
+    @item.enclosure_length.should == "8727310"
+    @item.enclosure_type.should == "audio/x-m4a"
+    @item.enclosure_url.should == "http://example.com/podcasts/everything/AllAboutEverythingEpisode3.m4a"
+  end
+  it "should parse the guid" do
+    @item.guid.should == "http://example.com/podcasts/archive/aae20050615.m4a"
+  end
+  it "should parse the published date" do
+    @item.published.should == "Wed, 15 Jun 2005 19:00:00 GMT"
+  end
+  it "should parse the duration" do
+    @item.itunes_duration.should == "7:04"
+  end
+  it "should parse the keywords" do
+    @item.itunes_keywords.should == "salt, pepper, shaker, exciting"
+  end
+end

data/spec/feedzirra/parser/itunes_rss_owner_spec.rb ADDED Viewed

@@ -0,0 +1,18 @@
+require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
+describe Feedzirra::Parser::ITunesRSSOwner do
+  before(:each) do
+    # I don't really like doing it this way because these unit test should only rely on RSSEntry,
+    # but this is actually how it should work. You would never just pass entry xml straight to the ITunesRssOwner
+    @owner = Feedzirra::Parser::ITunesRSS.parse(sample_itunes_feed).itunes_owners.first
+  end
+  it "should parse the name" do
+    @owner.name.should == "John Doe"
+  end
+  it "should parse the email" do
+    @owner.email.should == "john.doe@example.com"
+  end
+end

data/spec/feedzirra/parser/itunes_rss_spec.rb ADDED Viewed

@@ -0,0 +1,50 @@
+require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
+describe Feedzirra::Parser::ITunesRSS do
+  describe "#will_parse?" do
+    it "should return true for an itunes RSS feed" do
+      Feedzirra::Parser::ITunesRSS.should be_able_to_parse(sample_itunes_feed)
+    end
+    it "should return fase for an atom feed" do
+      Feedzirra::Parser::ITunesRSS.should_not be_able_to_parse(sample_atom_feed)
+    end
+  end
+  describe "parsing" do
+    before(:each) do
+      @feed = Feedzirra::Parser::ITunesRSS.parse(sample_itunes_feed)
+    end
+    it "should parse the subtitle" do
+      @feed.itunes_subtitle.should == "A show about everything"
+    end
+    it "should parse the author" do
+      @feed.itunes_author.should == "John Doe"
+    end
+    it "should parse an owner" do
+      @feed.itunes_owners.size.should == 1
+    end
+    it "should parse an image" do
+      @feed.itunes_image.should == "http://example.com/podcasts/everything/AllAboutEverything.jpg"
+    end
+    it "should parse categories" do
+      @feed.itunes_categories.size == 3
+      @feed.itunes_categories[0] == "Technology"
+      @feed.itunes_categories[1] == "Gadgets"
+      @feed.itunes_categories[2] == "TV &amp; Film"
+    end
+    it "should parse the summary" do
+      @feed.itunes_summary.should == "All About Everything is a show about everything. Each week we dive into any subject known to man and talk about it as much as we can. Look for our Podcast in the iTunes Music Store"
+    end
+    it "should parse entries" do
+      @feed.entries.size.should == 3
+    end
+  end
+end

data/spec/feedzirra/parser/rss_entry_spec.rb ADDED Viewed

@@ -0,0 +1,41 @@
+require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
+describe Feedzirra::Parser::RSSEntry do
+  before(:each) do
+    # I don't really like doing it this way because these unit test should only rely on RSSEntry,
+    # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
+    @entry = Feedzirra::Parser::RSS.parse(sample_rss_feed).entries.first
+  end
+  it "should parse the title" do
+    @entry.title.should == "Nokogiri’s Slop Feature"
+  end
+  it "should parse the url" do
+    @entry.url.should == "http://tenderlovemaking.com/2008/12/04/nokogiris-slop-feature/"
+  end
+  it "should parse the author" do
+    @entry.author.should == "Aaron Patterson"
+  end
+  it "should parse the content" do
+    @entry.content.should == sample_rss_entry_content
+  end
+  it "should provide a summary" do
+    @entry.summary.should == "Oops!  When I released nokogiri version 1.0.7, I totally forgot to talk about Nokogiri::Slop() feature that was added.  Why is it called \"slop\"?  It lets you sloppily explore documents.  Basically, it decorates your document with method_missing() that allows you to search your document via method calls.\nGiven this document:\n\ndoc = Nokogiri::Slop&#40;&#60;&#60;-eohtml&#41;\n&#60;html&#62;\n&#160; &#60;body&#62;\n&#160; [...]"
+  end
+  it "should parse the published date" do
+    @entry.published.to_s.should == "Thu Dec 04 17:17:49 UTC 2008"
+  end
+  it "should parse the categories" do
+    @entry.categories.should == ['computadora', 'nokogiri', 'rails']
+  end
+  it "should parse the guid as id" do
+    @entry.id.should == "http://tenderlovemaking.com/?p=198"
+  end
+end

data/spec/feedzirra/parser/rss_spec.rb ADDED Viewed

@@ -0,0 +1,45 @@
+require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
+describe Feedzirra::Parser::RSS do
+  describe "#will_parse?" do
+    it "should return true for an RSS feed" do
+      Feedzirra::Parser::RSS.should be_able_to_parse(sample_rss_feed)
+    end
+    # this is no longer true. combined rdf and rss into one
+    # it "should return false for an rdf feed" do
+    #   Feedzirra::RSS.should_not be_able_to_parse(sample_rdf_feed)
+    # end
+    it "should return fase for an atom feed" do
+      Feedzirra::Parser::RSS.should_not be_able_to_parse(sample_atom_feed)
+    end
+  end
+  describe "parsing" do
+    before(:each) do
+      @feed = Feedzirra::Parser::RSS.parse(sample_rss_feed)
+    end
+    it "should parse the title" do
+      @feed.title.should == "Tender Lovemaking"
+    end
+    it "shoudl parse the description" do
+      @feed.description.should == 'The act of making love, tenderly.'
+    end
+    it "should parse the url" do
+      @feed.url.should == "http://tenderlovemaking.com"
+    end
+    it "should provide an accessor for the feed_url" do
+      @feed.respond_to?(:feed_url).should == true
+      @feed.respond_to?(:feed_url=).should == true
+    end
+    it "should parse entries" do
+      @feed.entries.size.should == 10
+    end
+  end
+end

data/spec/spec.opts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ --diff
2	+ --color

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,70 @@
+require "rubygems"
+require "spec"
+# gem install redgreen for colored test output
+begin require "redgreen" unless ENV['TM_CURRENT_LINE']; rescue LoadError; end
+path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
+$LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
+require "lib/feedzirra"
+def load_sample(filename)
+  File.read("#{File.dirname(__FILE__)}/sample_feeds/#{filename}")
+end
+def sample_atom_feed
+  load_sample("AmazonWebServicesBlog.xml")
+end
+def sample_atom_entry_content
+  load_sample("AmazonWebServicesBlogFirstEntryContent.xml")
+end
+def sample_itunes_feed
+  load_sample("itunes.xml")
+end
+def sample_web_page
+  File.read("#{File.dirname(__FILE__)}/sample_feeds/BBCNewsWebPage.html")
+end
+def sample_web_page2
+  File.read("#{File.dirname(__FILE__)}/sample_feeds/avc.com.html")
+end
+def sample_problematic_parser_detection
+  File.read("#{File.dirname(__FILE__)}/sample_feeds/Independent.xml")
+end
+def sample_rdf_feed
+  load_sample("HREFConsideredHarmful.xml")
+end
+def sample_rdf_entry_content
+  load_sample("HREFConsideredHarmfulFirstEntry.xml")
+end
+def sample_rss_feed_burner_feed
+  load_sample("SamHarrisAuthorPhilosopherEssayistAtheist.xml")
+end
+def sample_rss_with_images
+  File.read("#{File.dirname(__FILE__)}/sample_feeds/MikeDoesTech.xml")
+end
+def sample_rss_feed
+  load_sample("TenderLovemaking.xml")
+end
+def sample_rss_entry_content
+  load_sample("TenderLovemakingFirstEntry.xml")
+end
+def sample_feedburner_atom_feed
+  load_sample("PaulDixExplainsNothing.xml")
+end
+def sample_feedburner_atom_entry_content
+  load_sample("PaulDixExplainsNothingFirstEntryContent.xml")
+end
+def sample_wfw_feed
+  load_sample("PaulDixExplainsNothingWFW.xml")
+end