RubyGems - feed_parser - Versions diffs - 0.3.2 → 0.3.3 - Mend

feed_parser 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/README.md CHANGED Viewed

@@ -23,7 +23,7 @@ Add to Gemfile
     # sanitizing custom field set
     fp = FeedParser.new(:url => "http://example.com/feed/", :sanitizer => MyBestestSanitizer.new, :fields_to_sanitize => [:title, :content])
-    # parse the feed
+    # retrieve the feed xml and parse it
     feed = fp.parse
     # using parsed feed in your code
@@ -42,7 +42,10 @@ If the fetched XML is not a valid RSS or an ATOM feed, a FeedParser::UnknownFeed
 ## Running tests
-Install dependencies by running `bundle install`.
+Install dependencies:
+    $ gem install bundler
+    $ bundle install
 Run rspec tests:

data/Rakefile CHANGED Viewed

@@ -19,9 +19,17 @@ end
 desc "Default: Run specs"
 task :default => :spec
-namespace :spec do
-  desc "Run tests with three major Ruby versions"
-  task :rubies do
-    system("rvm 1.8.7-p302@feed_parser,1.9.2-p0@feed_parser,1.9.3-p194@feed_parser do bundle exec rake spec")
+namespace :rubies do
+  rvm_rubies_command = "rvm 1.8.7-p302@feed_parser,1.9.3-p194@feed_parser do"
+  desc "Update dependencies for all Ruby versions"
+  task :update_dependencies do
+    system("#{rvm_rubies_command} bundle install")
+    system("#{rvm_rubies_command} bundle update")
+  end
+  desc "Run tests with Ruby versions 1.8.7 and 1.9.3"
+  task :spec do
+    system("#{rvm_rubies_command} bundle exec rake spec")
   end
 end

data/feed_parser.gemspec CHANGED Viewed

@@ -7,19 +7,18 @@ require 'feed_parser'
 Gem::Specification.new do |s|
   s.name        = 'feed_parser'
   s.version     = FeedParser::VERSION
-  s.platform    = Gem::Platform::RUBY
   s.authors     = ['Arttu Tervo']
   s.email       = ['arttu.tervo@gmail.com']
-  s.homepage    = 'http://github.com/arttu/feed_parser'
+  s.homepage    = 'https://github.com/arttu/feed_parser'
   s.summary     = %q{Rss and Atom feed parser}
   s.description = %q{Rss and Atom feed parser with sanitizer support built on top of Nokogiri.}
   s.add_dependency 'nokogiri'
-  s.add_development_dependency 'rspec-rails', '~> 2.6'
+  s.add_development_dependency 'rake', '>= 0.9'
+  s.add_development_dependency 'rspec', '>= 2.10'
   s.extra_rdoc_files = %w[README.md]
-  s.require_paths = %w[lib]
   s.files         = `git ls-files`.split("\n")
   s.test_files    = `git ls-files -- spec/*`.split("\n")

data/lib/feed_parser.rb CHANGED Viewed

@@ -3,15 +3,16 @@ require 'nokogiri'
 class FeedParser
-  VERSION = "0.3.2"
+  VERSION = "0.3.3"
   USER_AGENT = "Ruby / FeedParser gem"
   class FeedParser::UnknownFeedType < Exception ; end
+  class FeedParser::InvalidURI < Exception ; end
   def initialize(opts)
     @url = opts[:url]
-    @http_options = opts[:http] || {}
+    @http_options = {"User-Agent" => FeedParser::USER_AGENT}.merge(opts[:http] || {})
     @@sanitizer = (opts[:sanitizer] || SelfSanitizer.new)
     @@fields_to_sanitize = (opts[:fields_to_sanitize] || [:content])
     self
@@ -26,7 +27,34 @@ class FeedParser
   end
   def parse
-    @feed ||= Feed.new(@url, @http_options)
+    feed_xml = open_or_follow_redirect(@url)
+    @feed ||= Feed.new(feed_xml)
+  end
+  private
+  def open_or_follow_redirect(feed_url)
+    uri = URI.parse(feed_url)
+    if uri.userinfo
+      @http_options[:http_basic_authentication] = [uri.user, uri.password].compact
+      uri.userinfo = uri.user = uri.password = nil
+    end
+    @http_options[:redirect] = true if RUBY_VERSION >= '1.9'
+    if ['http', 'https'].include?(uri.scheme)
+      open(uri.to_s, @http_options)
+    else
+      raise FeedParser::InvalidURI.new("Only URIs with http or https protocol are supported")
+    end
+  rescue RuntimeError => ex
+    redirect_url = ex.to_s.split(" ").last
+    if URI.parse(feed_url).scheme == "http" && URI.parse(redirect_url).scheme == "https"
+      open_or_follow_redirect(redirect_url)
+    else
+      raise ex
+    end
   end
 end

data/lib/feed_parser/feed.rb CHANGED Viewed

@@ -2,10 +2,8 @@ class FeedParser
   class Feed
     attr_reader :type
-    def initialize(feed_url, http_options = {})
-      @http_options = http_options
-      raw_feed = open_or_follow_redirect(feed_url)
-      @feed = Nokogiri::XML(raw_feed)
+    def initialize(feed_xml)
+      @feed = Nokogiri::XML(feed_xml)
       @feed.remove_namespaces!
       @type = ((@feed.xpath('/rss')[0] && :rss) || (@feed.xpath('/feed')[0] && :atom))
       raise FeedParser::UnknownFeedType.new("Feed is not an RSS feed or an ATOM feed") unless @type
@@ -44,34 +42,5 @@ class FeedParser
         :items => items.map(&:as_json)
       }
     end
-    private
-    # Some feeds
-    def open_or_follow_redirect(feed_url)
-      parsed_url = URI.parse(feed_url)
-      connection_options = {"User-Agent" => FeedParser::USER_AGENT}
-      connection_options.merge!(@http_options)
-      if parsed_url.userinfo
-        connection_options[:http_basic_authentication] = [parsed_url.user, parsed_url.password].compact
-        parsed_url.userinfo = parsed_url.user = parsed_url.password = nil
-      end
-      connection_options[:redirect] = true if RUBY_VERSION >= '1.9'
-      if parsed_url.scheme
-        open(parsed_url.to_s, connection_options)
-      else
-        open(parsed_url.to_s)
-      end
-    rescue RuntimeError => ex
-      redirect_url = ex.to_s.split(" ").last
-      if URI.split(feed_url).first == "http" && URI.split(redirect_url).first == "https"
-        open_or_follow_redirect(redirect_url)
-      else
-        raise ex
-      end
-    end
   end
 end

data/spec/feed_parser_spec.rb CHANGED Viewed

@@ -20,96 +20,104 @@ describe FeedParser do
   describe "#new" do
     it "should forward given http options to the OpenURI" do
-      FeedParser::Feed.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE)).and_return(feed_xml)
+      FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE)).and_return(feed_xml)
       fp = FeedParser.new(:url => "http://blog.example.com/feed/", :http => {:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE})
       fp.parse
     end
-  end
-  describe FeedParser::Feed, "#new" do
     it "should fetch a feed by url" do
-      FeedParser::Feed.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options).and_return(feed_xml)
-      FeedParser::Feed.new("http://blog.example.com/feed/")
+      FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options).and_return(feed_xml)
+      fp = FeedParser.new({:url => "http://blog.example.com/feed/"}.merge(http_connection_options))
+      fp.parse
     end
     it "should fetch a feed using basic auth if auth embedded to the url" do
-      FeedParser::Feed.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:http_basic_authentication => ["user", "pass"])).and_return(feed_xml)
-      FeedParser::Feed.new("http://user:pass@blog.example.com/feed/")
+      FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:http_basic_authentication => ["user", "pass"])).and_return(feed_xml)
+      fp = FeedParser.new({:url => "http://user:pass@blog.example.com/feed/"}.merge(http_connection_options))
+      fp.parse
     end
     it "should fetch a feed with only a user name embedded to the url" do
-      FeedParser::Feed.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:http_basic_authentication => ["user"])).and_return(feed_xml)
-      FeedParser::Feed.new("http://user@blog.example.com/feed/")
+      FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:http_basic_authentication => ["user"])).and_return(feed_xml)
+      fp = FeedParser.new({:url => "http://user@blog.example.com/feed/"}.merge(http_connection_options))
+      fp.parse
     end
-    it "should follow redirect based on the exception message" do
-      FeedParser::Feed.any_instance.should_receive(:open).with("http://example.com/feed", http_connection_options).and_raise(RuntimeError.new("redirection forbidden: http://example.com/feed -> https://example.com/feed"))
-      FeedParser::Feed.any_instance.should_receive(:open).with("https://example.com/feed", http_connection_options).and_return(feed_xml)
-      FeedParser::Feed.new("http://example.com/feed")
+    it "should follow redirect based on the exception message (even if OpenURI don't want to do it)" do
+      FeedParser.any_instance.should_receive(:open).with("http://example.com/feed", http_connection_options).and_raise(RuntimeError.new("redirection forbidden: http://example.com/feed -> https://example.com/feed"))
+      FeedParser.any_instance.should_receive(:open).with("https://example.com/feed", http_connection_options).and_return(feed_xml)
+      fp = FeedParser.new({:url => "http://example.com/feed"}.merge(http_connection_options))
+      fp.parse
     end
-    it "should not follow redirect from secure connection to non-secure one" do
-      FeedParser::Feed.any_instance.should_receive(:open).with("https://example.com/feed", http_connection_options).and_raise(RuntimeError.new("redirection forbidden: https://example.com/feed -> http://example.com/feed"))
-      FeedParser::Feed.any_instance.should_not_receive(:open).with("http://example.com/feed", http_connection_options)
+    it "should not follow redirect from a secure connection to a non-secure one" do
+      FeedParser.any_instance.should_receive(:open).with("https://example.com/feed", http_connection_options).and_raise(RuntimeError.new("redirection forbidden: https://example.com/feed -> http://example.com/feed"))
+      FeedParser.any_instance.should_not_receive(:open).with("http://example.com/feed", http_connection_options)
       lambda {
-        FeedParser::Feed.new("https://example.com/feed")
+        fp = FeedParser.new({:url => "https://example.com/feed"}.merge(http_connection_options))
+        fp.parse
       }.should raise_error(RuntimeError, "redirection forbidden: https://example.com/feed -> http://example.com/feed")
     end
-    it "should use alternate url if there is no valid self url in the received feed xml" do
-      FeedParser::Feed.any_instance.should_receive(:open).with("https://developers.facebook.com/blog/feed", http_connection_options).and_return(feed_xml('facebook.atom.xml'))
-      lambda {
-        feed = FeedParser::Feed.new("https://developers.facebook.com/blog/feed")
-        feed.url.should == "https://developers.facebook.com/blog/feed"
-      }.should_not raise_error
-    end
     it "should raise an error unless retrieved XML is not an RSS or an ATOM feed" do
-      FeedParser::Feed.any_instance.should_receive(:open).with("http://example.com/blog/feed/invalid.xml", http_connection_options).and_return("foo bar")
+      FeedParser.any_instance.should_receive(:open).with("http://example.com/blog/feed/invalid.xml", http_connection_options).and_return("foo bar")
       lambda {
-        FeedParser::Feed.new("http://example.com/blog/feed/invalid.xml")
+        fp = FeedParser.new({:url => "http://example.com/blog/feed/invalid.xml"}.merge(http_connection_options))
+        fp.parse
       }.should raise_error(FeedParser::UnknownFeedType, "Feed is not an RSS feed or an ATOM feed")
     end
-  end
-  describe "#parse" do
-    shared_examples_for "feed parser" do
-      it "should not fail" do
-        lambda {
-          @feed = @feed_parser.parse
-        }.should_not raise_error
-      end
-      it "should populate every item" do
-        @feed = @feed_parser.parse
-        @feed.items.each do |item|
-          [:guid, :link, :title, :categories, :author, :content].each do |attribute|
-            item.send(attribute).should_not be_nil
-            item.send(attribute).should_not be_empty
-          end
-        end
-      end
+    it "should not allow feeds without http(s) protocol" do
+      lambda {
+        fp = FeedParser.new({:url => "feed://example.com/feed"}.merge(http_connection_options))
+        fp.parse
+      }.should raise_error(FeedParser::InvalidURI, "Only URIs with http or https protocol are supported")
     end
+  end
-    def case_tester(test_cases)
+  describe "::Feed" do
+    def case_tester(feed, test_cases)
       test_cases.each do |test_case|
         if test_case.last.is_a?(Array)
           test_case.last.each do |_case|
-            @feed.as_json[test_case.first].should include(_case)
+            feed.as_json[test_case.first].should include(_case)
           end
         else
-          @feed.send(test_case.first).should include(test_case.last)
+          feed.send(test_case.first).should include(test_case.last)
         end
       end
     end
+    describe "sanitizer" do
+      it "should sanitize with custom sanitizer" do
+        FeedParser.new(:url => "https://example.com/feed", :sanitizer => NotSaneSanitizer.new)
+        feed = FeedParser::Feed.new(feed_xml('sanitize.me.rss.xml'))
+        feed.items.first.content.should_not =~ (/flowdock/i)
+      end
+      it "should sanitize custom fields" do
+        FeedParser.new(:url => "https://example.com/feed", :sanitizer => NotSaneSanitizer.new, :fields_to_sanitize => [:author, :content])
+        feed = FeedParser::Feed.new(feed_xml('sanitize.me.rss.xml'))
+        feed.items.first.author.should == 'Sanitized'
+      end
+    end
     describe "rss feeds" do
-      before :each do
-        @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', 'nodeta.rss.xml'))
+      it "should be an rss feed" do
+        feed = FeedParser::Feed.new(feed_xml('nodeta.rss.xml'))
+        feed.type.should == :rss
       end
-      after :each do
-        @feed.type.should == :rss
+      it "should populate every item" do
+        feed = FeedParser::Feed.new(feed_xml('nodeta.rss.xml'))
+        feed.items.each do |item|
+          [:guid, :link, :title, :categories, :author, :content].each do |attribute|
+            item.send(attribute).should_not be_nil
+            item.send(attribute).should_not be_empty
+          end
+        end
       end
       {
@@ -165,40 +173,27 @@ describe FeedParser do
         },
       }.each do |rss_fixture, test_cases|
         it "should parse #{rss_fixture}" do
-          @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', rss_fixture))
-          @feed = @feed_parser.parse
+          feed = FeedParser::Feed.new(feed_xml(rss_fixture))
-          case_tester(test_cases)
+          case_tester(feed, test_cases)
         end
       end
-      it "should sanitize with custom sanitizer" do
-        @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', 'sanitize.me.rss.xml'), :sanitizer => NotSaneSanitizer.new)
-        @feed = @feed_parser.parse
-        @feed.items.first.content.should_not =~ (/flowdock/i)
-      end
-      it "should sanitize custom fields" do
-        @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', 'sanitize.me.rss.xml'), :sanitizer => NotSaneSanitizer.new, :fields_to_sanitize => [:author, :content])
-        @feed = @feed_parser.parse
-        @feed.items.first.author.should == 'Sanitized'
-      end
-      it_should_behave_like "feed parser"
     end
     describe "atom feeds" do
-      before :each do
-        @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', 'smashingmagazine.atom.xml'))
+      it "should be an atom feed" do
+        feed = FeedParser::Feed.new(feed_xml('smashingmagazine.atom.xml'))
+        feed.type.should == :atom
       end
-      after :each do
-        @feed.type.should == :atom
+      it "should populate every item" do
+        feed = FeedParser::Feed.new(feed_xml('smashingmagazine.atom.xml'))
+        feed.items.each do |item|
+          [:guid, :link, :title, :categories, :author, :content].each do |attribute|
+            item.send(attribute).should_not be_nil
+            item.send(attribute).should_not be_empty
+          end
+        end
       end
       {
@@ -230,15 +225,18 @@ describe FeedParser do
         }
       }.each do |atom_fixture, test_cases|
         it "should parse #{atom_fixture}" do
-          @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', atom_fixture))
-          @feed = @feed_parser.parse
+          feed = FeedParser::Feed.new(feed_xml(atom_fixture))
-          case_tester(test_cases)
+          case_tester(feed, test_cases)
         end
       end
-      it_should_behave_like "feed parser"
+      it "should use alternate url if there is no valid self url in the received feed xml" do
+        lambda {
+          feed = FeedParser::Feed.new(feed_xml('facebook.atom.xml'))
+          feed.url.should == "https://developers.facebook.com/blog/feed"
+        }.should_not raise_error
+      end
     end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: feed_parser
 version: !ruby/object:Gem::Version
-  version: 0.3.2
+  version: 0.3.3
   prerelease:
 platform: ruby
 authors:
@@ -13,7 +13,7 @@ date: 2012-06-26 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
-  requirement: &2153328640 !ruby/object:Gem::Requirement
+  requirement: &2153222140 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -21,18 +21,29 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *2153328640
+  version_requirements: *2153222140
 - !ruby/object:Gem::Dependency
-  name: rspec-rails
-  requirement: &2153383440 !ruby/object:Gem::Requirement
+  name: rake
+  requirement: &2153221640 !ruby/object:Gem::Requirement
     none: false
     requirements:
-    - - ~>
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0.9'
+  type: :development
+  prerelease: false
+  version_requirements: *2153221640
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: &2153221140 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
       - !ruby/object:Gem::Version
-        version: '2.6'
+        version: '2.10'
   type: :development
   prerelease: false
-  version_requirements: *2153383440
+  version_requirements: *2153221140
 description: Rss and Atom feed parser with sanitizer support built on top of Nokogiri.
 email:
 - arttu.tervo@gmail.com
@@ -62,7 +73,7 @@ files:
 - spec/fixtures/sanitize.me.rss.xml
 - spec/fixtures/scrumalliance.rss.xml
 - spec/fixtures/smashingmagazine.atom.xml
-homepage: http://github.com/arttu/feed_parser
+homepage: https://github.com/arttu/feed_parser
 licenses: []
 post_install_message:
 rdoc_options: []