RubyGems - feed_parser - Versions diffs - 0.3.2 → 0.3.3 - Mend

feed_parser 0.3.2 → 0.3.3

Files changed (7) hide show

data/README.md CHANGED Viewed

@@ -23,7 +23,7 @@ Add to Gemfile
     # sanitizing custom field set
     fp = FeedParser.new(:url => "http://example.com/feed/", :sanitizer => MyBestestSanitizer.new, :fields_to_sanitize => [:title, :content])
-    # parse the feed
+    # retrieve the feed xml and parse it
     feed = fp.parse
     # using parsed feed in your code
@@ -42,7 +42,10 @@ If the fetched XML is not a valid RSS or an ATOM feed, a FeedParser::UnknownFeed
 ## Running tests
-Install dependencies by running `bundle install`.
+Install dependencies:
+    $ gem install bundler
+    $ bundle install
 Run rspec tests:

data/Rakefile CHANGED Viewed

@@ -19,9 +19,17 @@ end
 desc "Default: Run specs"
 task :default => :spec
-namespace :spec do
-  desc "Run tests with three major Ruby versions"
-  task :rubies do
-    system("rvm 1.8.7-p302@feed_parser,1.9.2-p0@feed_parser,1.9.3-p194@feed_parser do bundle exec rake spec")
+namespace :rubies do
+  rvm_rubies_command = "rvm 1.8.7-p302@feed_parser,1.9.3-p194@feed_parser do"
+  desc "Update dependencies for all Ruby versions"
+  task :update_dependencies do
+    system("#{rvm_rubies_command} bundle install")
+    system("#{rvm_rubies_command} bundle update")
+  end
+  desc "Run tests with Ruby versions 1.8.7 and 1.9.3"
+  task :spec do
+    system("#{rvm_rubies_command} bundle exec rake spec")
   end
 end

data/feed_parser.gemspec CHANGED Viewed

@@ -7,19 +7,18 @@ require 'feed_parser'
 Gem::Specification.new do |s|
   s.name        = 'feed_parser'
   s.version     = FeedParser::VERSION
-  s.platform    = Gem::Platform::RUBY
   s.authors     = ['Arttu Tervo']
   s.email       = ['arttu.tervo@gmail.com']
-  s.homepage    = 'http://github.com/arttu/feed_parser'
+  s.homepage    = 'https://github.com/arttu/feed_parser'
   s.summary     = %q{Rss and Atom feed parser}
   s.description = %q{Rss and Atom feed parser with sanitizer support built on top of Nokogiri.}
   s.add_dependency 'nokogiri'
-  s.add_development_dependency 'rspec-rails', '~> 2.6'
+  s.add_development_dependency 'rake', '>= 0.9'
+  s.add_development_dependency 'rspec', '>= 2.10'
   s.extra_rdoc_files = %w[README.md]
-  s.require_paths = %w[lib]
   s.files         = `git ls-files`.split("\n")
   s.test_files    = `git ls-files -- spec/*`.split("\n")

data/lib/feed_parser.rb CHANGED Viewed

@@ -3,15 +3,16 @@ require 'nokogiri'
 class FeedParser
-  VERSION = "0.3.2"
+  VERSION = "0.3.3"
   USER_AGENT = "Ruby / FeedParser gem"
   class FeedParser::UnknownFeedType < Exception ; end
+  class FeedParser::InvalidURI < Exception ; end
   def initialize(opts)
     @url = opts[:url]
-    @http_options = opts[:http] || {}
+    @http_options = {"User-Agent" => FeedParser::USER_AGENT}.merge(opts[:http] || {})
     @@sanitizer = (opts[:sanitizer] || SelfSanitizer.new)
     @@fields_to_sanitize = (opts[:fields_to_sanitize] || [:content])
     self
@@ -26,7 +27,34 @@ class FeedParser
   end
   def parse
-    @feed ||= Feed.new(@url, @http_options)
+    feed_xml = open_or_follow_redirect(@url)
+    @feed ||= Feed.new(feed_xml)
+  end
+  private
+  def open_or_follow_redirect(feed_url)
+    uri = URI.parse(feed_url)
+    if uri.userinfo
+      @http_options[:http_basic_authentication] = [uri.user, uri.password].compact
+      uri.userinfo = uri.user = uri.password = nil
+    end
+    @http_options[:redirect] = true if RUBY_VERSION >= '1.9'
+    if ['http', 'https'].include?(uri.scheme)
+      open(uri.to_s, @http_options)
+    else
+      raise FeedParser::InvalidURI.new("Only URIs with http or https protocol are supported")
+    end
+  rescue RuntimeError => ex
+    redirect_url = ex.to_s.split(" ").last
+    if URI.parse(feed_url).scheme == "http" && URI.parse(redirect_url).scheme == "https"
+      open_or_follow_redirect(redirect_url)
+    else
+      raise ex
+    end
   end
 end

data/lib/feed_parser/feed.rb CHANGED Viewed

@@ -2,10 +2,8 @@ class FeedParser
   class Feed
     attr_reader :type
-    def initialize(feed_url, http_options = {})
-      @http_options = http_options
-      raw_feed = open_or_follow_redirect(feed_url)
-      @feed = Nokogiri::XML(raw_feed)
+    def initialize(feed_xml)
+      @feed = Nokogiri::XML(feed_xml)
       @feed.remove_namespaces!
       @type = ((@feed.xpath('/rss')[0] && :rss) || (@feed.xpath('/feed')[0] && :atom))
       raise FeedParser::UnknownFeedType.new("Feed is not an RSS feed or an ATOM feed") unless @type
@@ -44,34 +42,5 @@ class FeedParser
         :items => items.map(&:as_json)
       }
     end
-    private
-    # Some feeds
-    def open_or_follow_redirect(feed_url)
-      parsed_url = URI.parse(feed_url)
-      connection_options = {"User-Agent" => FeedParser::USER_AGENT}
-      connection_options.merge!(@http_options)
-      if parsed_url.userinfo
-        connection_options[:http_basic_authentication] = [parsed_url.user, parsed_url.password].compact
-        parsed_url.userinfo = parsed_url.user = parsed_url.password = nil
-      end
-      connection_options[:redirect] = true if RUBY_VERSION >= '1.9'
-      if parsed_url.scheme
-        open(parsed_url.to_s, connection_options)
-      else
-        open(parsed_url.to_s)
-      end
-    rescue RuntimeError => ex
-      redirect_url = ex.to_s.split(" ").last
-      if URI.split(feed_url).first == "http" && URI.split(redirect_url).first == "https"
-        open_or_follow_redirect(redirect_url)
-      else
-        raise ex
-      end
-    end
   end
 end

data/spec/feed_parser_spec.rb CHANGED Viewed

@@ -20,96 +20,104 @@ describe FeedParser do
   describe "#new" do
     it "should forward given http options to the OpenURI" do
-      FeedParser::Feed.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE)).and_return(feed_xml)
+      FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE)).and_return(feed_xml)
       fp = FeedParser.new(:url => "http://blog.example.com/feed/", :http => {:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE})
       fp.parse
     end
-  end
-  describe FeedParser::Feed, "#new" do
     it "should fetch a feed by url" do
-      FeedParser::Feed.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options).and_return(feed_xml)
-      FeedParser::Feed.new("http://blog.example.com/feed/")
+      FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options).and_return(feed_xml)
+      fp = FeedParser.new({:url => "http://blog.example.com/feed/"}.merge(http_connection_options))
+      fp.parse
     end
     it "should fetch a feed using basic auth if auth embedded to the url" do
-      FeedParser::Feed.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:http_basic_authentication => ["user", "pass"])).and_return(feed_xml)
-      FeedParser::Feed.new("http://user:pass@blog.example.com/feed/")
+      FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:http_basic_authentication => ["user", "pass"])).and_return(feed_xml)
+      fp = FeedParser.new({:url => "http://user:pass@blog.example.com/feed/"}.merge(http_connection_options))
+      fp.parse
     end
     it "should fetch a feed with only a user name embedded to the url" do
-      FeedParser::Feed.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:http_basic_authentication => ["user"])).and_return(feed_xml)
-      FeedParser::Feed.new("http://user@blog.example.com/feed/")
+      FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:http_basic_authentication => ["user"])).and_return(feed_xml)
+      fp = FeedParser.new({:url => "http://user@blog.example.com/feed/"}.merge(http_connection_options))
+      fp.parse
     end
-    it "should follow redirect based on the exception message" do
-      FeedParser::Feed.any_instance.should_receive(:open).with("http://example.com/feed", http_connection_options).and_raise(RuntimeError.new("redirection forbidden: http://example.com/feed -> https://example.com/feed"))
-      FeedParser::Feed.any_instance.should_receive(:open).with("https://example.com/feed", http_connection_options).and_return(feed_xml)
-      FeedParser::Feed.new("http://example.com/feed")
+    it "should follow redirect based on the exception message (even if OpenURI don't want to do it)" do
+      FeedParser.any_instance.should_receive(:open).with("http://example.com/feed", http_connection_options).and_raise(RuntimeError.new("redirection forbidden: http://example.com/feed -> https://example.com/feed"))
+      FeedParser.any_instance.should_receive(:open).with("https://example.com/feed", http_connection_options).and_return(feed_xml)
+      fp = FeedParser.new({:url => "http://example.com/feed"}.merge(http_connection_options))
+      fp.parse
     end
-    it "should not follow redirect from secure connection to non-secure one" do
-      FeedParser::Feed.any_instance.should_receive(:open).with("https://example.com/feed", http_connection_options).and_raise(RuntimeError.new("redirection forbidden: https://example.com/feed -> http://example.com/feed"))
-      FeedParser::Feed.any_instance.should_not_receive(:open).with("http://example.com/feed", http_connection_options)
+    it "should not follow redirect from a secure connection to a non-secure one" do
+      FeedParser.any_instance.should_receive(:open).with("https://example.com/feed", http_connection_options).and_raise(RuntimeError.new("redirection forbidden: https://example.com/feed -> http://example.com/feed"))
+      FeedParser.any_instance.should_not_receive(:open).with("http://example.com/feed", http_connection_options)
       lambda {
-        FeedParser::Feed.new("https://example.com/feed")
+        fp = FeedParser.new({:url => "https://example.com/feed"}.merge(http_connection_options))
+        fp.parse
       }.should raise_error(RuntimeError, "redirection forbidden: https://example.com/feed -> http://example.com/feed")
     end
-    it "should use alternate url if there is no valid self url in the received feed xml" do
-      FeedParser::Feed.any_instance.should_receive(:open).with("https://developers.facebook.com/blog/feed", http_connection_options).and_return(feed_xml('facebook.atom.xml'))
-      lambda {
-        feed = FeedParser::Feed.new("https://developers.facebook.com/blog/feed")
-        feed.url.should == "https://developers.facebook.com/blog/feed"
-      }.should_not raise_error
-    end
     it "should raise an error unless retrieved XML is not an RSS or an ATOM feed" do
-      FeedParser::Feed.any_instance.should_receive(:open).with("http://example.com/blog/feed/invalid.xml", http_connection_options).and_return("foo bar")
+      FeedParser.any_instance.should_receive(:open).with("http://example.com/blog/feed/invalid.xml", http_connection_options).and_return("foo bar")
       lambda {
-        FeedParser::Feed.new("http://example.com/blog/feed/invalid.xml")
+        fp = FeedParser.new({:url => "http://example.com/blog/feed/invalid.xml"}.merge(http_connection_options))
+        fp.parse
       }.should raise_error(FeedParser::UnknownFeedType, "Feed is not an RSS feed or an ATOM feed")
     end
-  end
-  describe "#parse" do
-    shared_examples_for "feed parser" do
-      it "should not fail" do
-        lambda {
-          @feed = @feed_parser.parse
-        }.should_not raise_error
-      end
-      it "should populate every item" do
-        @feed = @feed_parser.parse
-        @feed.items.each do |item|
-          [:guid, :link, :title, :categories, :author, :content].each do |attribute|
-            item.send(attribute).should_not be_nil
-            item.send(attribute).should_not be_empty
-          end
-        end
-      end
+    it "should not allow feeds without http(s) protocol" do
+      lambda {
+        fp = FeedParser.new({:url => "feed://example.com/feed"}.merge(http_connection_options))
+        fp.parse
+      }.should raise_error(FeedParser::InvalidURI, "Only URIs with http or https protocol are supported")
     end
+  end
-    def case_tester(test_cases)
+  describe "::Feed" do
+    def case_tester(feed, test_cases)
       test_cases.each do |test_case|
         if test_case.last.is_a?(Array)
           test_case.last.each do |_case|
-            @feed.as_json[test_case.first].should include(_case)
+            feed.as_json[test_case.first].should include(_case)
           end
         else
-          @feed.send(test_case.first).should include(test_case.last)
+          feed.send(test_case.first).should include(test_case.last)
         end
       end
     end
+    describe "sanitizer" do
+      it "should sanitize with custom sanitizer" do
+        FeedParser.new(:url => "https://example.com/feed", :sanitizer => NotSaneSanitizer.new)
+        feed = FeedParser::Feed.new(feed_xml('sanitize.me.rss.xml'))
+        feed.items.first.content.should_not =~ (/flowdock/i)
+      end
+      it "should sanitize custom fields" do
+        FeedParser.new(:url => "https://example.com/feed", :sanitizer => NotSaneSanitizer.new, :fields_to_sanitize => [:author, :content])
+        feed = FeedParser::Feed.new(feed_xml('sanitize.me.rss.xml'))
+        feed.items.first.author.should == 'Sanitized'
+      end
+    end
     describe "rss feeds" do
-      before :each do
-        @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', 'nodeta.rss.xml'))
+      it "should be an rss feed" do
+        feed = FeedParser::Feed.new(feed_xml('nodeta.rss.xml'))
+        feed.type.should == :rss
       end
-      after :each do
-        @feed.type.should == :rss
+      it "should populate every item" do
+        feed = FeedParser::Feed.new(feed_xml('nodeta.rss.xml'))
+        feed.items.each do |item|
+          [:guid, :link, :title, :categories, :author, :content].each do |attribute|
+            item.send(attribute).should_not be_nil
+            item.send(attribute).should_not be_empty
+          end
+        end
       end
       {
@@ -165,40 +173,27 @@ describe FeedParser do
         },
       }.each do |rss_fixture, test_cases|
         it "should parse #{rss_fixture}" do
-          @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', rss_fixture))
-          @feed = @feed_parser.parse
+          feed = FeedParser::Feed.new(feed_xml(rss_fixture))
-          case_tester(test_cases)
+          case_tester(feed, test_cases)
         end
       end
-      it "should sanitize with custom sanitizer" do
-        @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', 'sanitize.me.rss.xml'), :sanitizer => NotSaneSanitizer.new)
-        @feed = @feed_parser.parse
-        @feed.items.first.content.should_not =~ (/flowdock/i)
-      end
-      it "should sanitize custom fields" do
-        @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', 'sanitize.me.rss.xml'), :sanitizer => NotSaneSanitizer.new, :fields_to_sanitize => [:author, :content])
-        @feed = @feed_parser.parse
-        @feed.items.first.author.should == 'Sanitized'
-      end
-      it_should_behave_like "feed parser"
     end
     describe "atom feeds" do
-      before :each do
-        @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', 'smashingmagazine.atom.xml'))
+      it "should be an atom feed" do
+        feed = FeedParser::Feed.new(feed_xml('smashingmagazine.atom.xml'))
+        feed.type.should == :atom
       end
-      after :each do
-        @feed.type.should == :atom
+      it "should populate every item" do
+        feed = FeedParser::Feed.new(feed_xml('smashingmagazine.atom.xml'))
+        feed.items.each do |item|
+          [:guid, :link, :title, :categories, :author, :content].each do |attribute|
+            item.send(attribute).should_not be_nil
+            item.send(attribute).should_not be_empty
+          end
+        end
       end
       {
@@ -230,15 +225,18 @@ describe FeedParser do
         }
       }.each do |atom_fixture, test_cases|
         it "should parse #{atom_fixture}" do
-          @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', atom_fixture))
-          @feed = @feed_parser.parse
+          feed = FeedParser::Feed.new(feed_xml(atom_fixture))
-          case_tester(test_cases)
+          case_tester(feed, test_cases)
         end
       end
-      it_should_behave_like "feed parser"
+      it "should use alternate url if there is no valid self url in the received feed xml" do
+        lambda {
+          feed = FeedParser::Feed.new(feed_xml('facebook.atom.xml'))
+          feed.url.should == "https://developers.facebook.com/blog/feed"
+        }.should_not raise_error
+      end
     end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: feed_parser
 version: !ruby/object:Gem::Version
-  version: 0.3.2
+  version: 0.3.3
   prerelease:
 platform: ruby
 authors:
@@ -13,7 +13,7 @@ date: 2012-06-26 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
-  requirement: &2153328640 !ruby/object:Gem::Requirement
+  requirement: &2153222140 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -21,18 +21,29 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *2153328640
+  version_requirements: *2153222140
 - !ruby/object:Gem::Dependency
-  name: rspec-rails
-  requirement: &2153383440 !ruby/object:Gem::Requirement
+  name: rake
+  requirement: &2153221640 !ruby/object:Gem::Requirement
     none: false
     requirements:
-    - - ~>
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0.9'
+  type: :development
+  prerelease: false
+  version_requirements: *2153221640
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: &2153221140 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
       - !ruby/object:Gem::Version
-        version: '2.6'
+        version: '2.10'
   type: :development
   prerelease: false
-  version_requirements: *2153383440
+  version_requirements: *2153221140
 description: Rss and Atom feed parser with sanitizer support built on top of Nokogiri.
 email:
 - arttu.tervo@gmail.com
@@ -62,7 +73,7 @@ files:
 - spec/fixtures/sanitize.me.rss.xml
 - spec/fixtures/scrumalliance.rss.xml
 - spec/fixtures/smashingmagazine.atom.xml
-homepage: http://github.com/arttu/feed_parser
+homepage: https://github.com/arttu/feed_parser
 licenses: []
 post_install_message:
 rdoc_options: []