feed_parser 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -23,7 +23,7 @@ Add to Gemfile
23
23
  # sanitizing custom field set
24
24
  fp = FeedParser.new(:url => "http://example.com/feed/", :sanitizer => MyBestestSanitizer.new, :fields_to_sanitize => [:title, :content])
25
25
 
26
- # parse the feed
26
+ # retrieve the feed xml and parse it
27
27
  feed = fp.parse
28
28
 
29
29
  # using parsed feed in your code
@@ -42,7 +42,10 @@ If the fetched XML is not a valid RSS or an ATOM feed, a FeedParser::UnknownFeed
42
42
 
43
43
  ## Running tests
44
44
 
45
- Install dependencies by running `bundle install`.
45
+ Install dependencies:
46
+
47
+ $ gem install bundler
48
+ $ bundle install
46
49
 
47
50
  Run rspec tests:
48
51
 
data/Rakefile CHANGED
@@ -19,9 +19,17 @@ end
19
19
  desc "Default: Run specs"
20
20
  task :default => :spec
21
21
 
22
- namespace :spec do
23
- desc "Run tests with three major Ruby versions"
24
- task :rubies do
25
- system("rvm 1.8.7-p302@feed_parser,1.9.2-p0@feed_parser,1.9.3-p194@feed_parser do bundle exec rake spec")
22
+ namespace :rubies do
23
+ rvm_rubies_command = "rvm 1.8.7-p302@feed_parser,1.9.3-p194@feed_parser do"
24
+
25
+ desc "Update dependencies for all Ruby versions"
26
+ task :update_dependencies do
27
+ system("#{rvm_rubies_command} bundle install")
28
+ system("#{rvm_rubies_command} bundle update")
29
+ end
30
+
31
+ desc "Run tests with Ruby versions 1.8.7 and 1.9.3"
32
+ task :spec do
33
+ system("#{rvm_rubies_command} bundle exec rake spec")
26
34
  end
27
35
  end
data/feed_parser.gemspec CHANGED
@@ -7,19 +7,18 @@ require 'feed_parser'
7
7
  Gem::Specification.new do |s|
8
8
  s.name = 'feed_parser'
9
9
  s.version = FeedParser::VERSION
10
- s.platform = Gem::Platform::RUBY
11
10
  s.authors = ['Arttu Tervo']
12
11
  s.email = ['arttu.tervo@gmail.com']
13
- s.homepage = 'http://github.com/arttu/feed_parser'
12
+ s.homepage = 'https://github.com/arttu/feed_parser'
14
13
  s.summary = %q{Rss and Atom feed parser}
15
14
  s.description = %q{Rss and Atom feed parser with sanitizer support built on top of Nokogiri.}
16
15
 
17
16
  s.add_dependency 'nokogiri'
18
17
 
19
- s.add_development_dependency 'rspec-rails', '~> 2.6'
18
+ s.add_development_dependency 'rake', '>= 0.9'
19
+ s.add_development_dependency 'rspec', '>= 2.10'
20
20
 
21
21
  s.extra_rdoc_files = %w[README.md]
22
- s.require_paths = %w[lib]
23
22
 
24
23
  s.files = `git ls-files`.split("\n")
25
24
  s.test_files = `git ls-files -- spec/*`.split("\n")
data/lib/feed_parser.rb CHANGED
@@ -3,15 +3,16 @@ require 'nokogiri'
3
3
 
4
4
  class FeedParser
5
5
 
6
- VERSION = "0.3.2"
6
+ VERSION = "0.3.3"
7
7
 
8
8
  USER_AGENT = "Ruby / FeedParser gem"
9
9
 
10
10
  class FeedParser::UnknownFeedType < Exception ; end
11
+ class FeedParser::InvalidURI < Exception ; end
11
12
 
12
13
  def initialize(opts)
13
14
  @url = opts[:url]
14
- @http_options = opts[:http] || {}
15
+ @http_options = {"User-Agent" => FeedParser::USER_AGENT}.merge(opts[:http] || {})
15
16
  @@sanitizer = (opts[:sanitizer] || SelfSanitizer.new)
16
17
  @@fields_to_sanitize = (opts[:fields_to_sanitize] || [:content])
17
18
  self
@@ -26,7 +27,34 @@ class FeedParser
26
27
  end
27
28
 
28
29
  def parse
29
- @feed ||= Feed.new(@url, @http_options)
30
+ feed_xml = open_or_follow_redirect(@url)
31
+ @feed ||= Feed.new(feed_xml)
32
+ end
33
+
34
+ private
35
+
36
+ def open_or_follow_redirect(feed_url)
37
+ uri = URI.parse(feed_url)
38
+
39
+ if uri.userinfo
40
+ @http_options[:http_basic_authentication] = [uri.user, uri.password].compact
41
+ uri.userinfo = uri.user = uri.password = nil
42
+ end
43
+
44
+ @http_options[:redirect] = true if RUBY_VERSION >= '1.9'
45
+
46
+ if ['http', 'https'].include?(uri.scheme)
47
+ open(uri.to_s, @http_options)
48
+ else
49
+ raise FeedParser::InvalidURI.new("Only URIs with http or https protocol are supported")
50
+ end
51
+ rescue RuntimeError => ex
52
+ redirect_url = ex.to_s.split(" ").last
53
+ if URI.parse(feed_url).scheme == "http" && URI.parse(redirect_url).scheme == "https"
54
+ open_or_follow_redirect(redirect_url)
55
+ else
56
+ raise ex
57
+ end
30
58
  end
31
59
  end
32
60
 
@@ -2,10 +2,8 @@ class FeedParser
2
2
  class Feed
3
3
  attr_reader :type
4
4
 
5
- def initialize(feed_url, http_options = {})
6
- @http_options = http_options
7
- raw_feed = open_or_follow_redirect(feed_url)
8
- @feed = Nokogiri::XML(raw_feed)
5
+ def initialize(feed_xml)
6
+ @feed = Nokogiri::XML(feed_xml)
9
7
  @feed.remove_namespaces!
10
8
  @type = ((@feed.xpath('/rss')[0] && :rss) || (@feed.xpath('/feed')[0] && :atom))
11
9
  raise FeedParser::UnknownFeedType.new("Feed is not an RSS feed or an ATOM feed") unless @type
@@ -44,34 +42,5 @@ class FeedParser
44
42
  :items => items.map(&:as_json)
45
43
  }
46
44
  end
47
-
48
- private
49
-
50
- # Some feeds
51
- def open_or_follow_redirect(feed_url)
52
- parsed_url = URI.parse(feed_url)
53
-
54
- connection_options = {"User-Agent" => FeedParser::USER_AGENT}
55
- connection_options.merge!(@http_options)
56
- if parsed_url.userinfo
57
- connection_options[:http_basic_authentication] = [parsed_url.user, parsed_url.password].compact
58
- parsed_url.userinfo = parsed_url.user = parsed_url.password = nil
59
- end
60
-
61
- connection_options[:redirect] = true if RUBY_VERSION >= '1.9'
62
-
63
- if parsed_url.scheme
64
- open(parsed_url.to_s, connection_options)
65
- else
66
- open(parsed_url.to_s)
67
- end
68
- rescue RuntimeError => ex
69
- redirect_url = ex.to_s.split(" ").last
70
- if URI.split(feed_url).first == "http" && URI.split(redirect_url).first == "https"
71
- open_or_follow_redirect(redirect_url)
72
- else
73
- raise ex
74
- end
75
- end
76
45
  end
77
46
  end
@@ -20,96 +20,104 @@ describe FeedParser do
20
20
 
21
21
  describe "#new" do
22
22
  it "should forward given http options to the OpenURI" do
23
- FeedParser::Feed.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE)).and_return(feed_xml)
23
+ FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE)).and_return(feed_xml)
24
24
  fp = FeedParser.new(:url => "http://blog.example.com/feed/", :http => {:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE})
25
25
  fp.parse
26
26
  end
27
- end
28
27
 
29
- describe FeedParser::Feed, "#new" do
30
28
  it "should fetch a feed by url" do
31
- FeedParser::Feed.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options).and_return(feed_xml)
32
- FeedParser::Feed.new("http://blog.example.com/feed/")
29
+ FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options).and_return(feed_xml)
30
+ fp = FeedParser.new({:url => "http://blog.example.com/feed/"}.merge(http_connection_options))
31
+ fp.parse
33
32
  end
34
33
 
35
34
  it "should fetch a feed using basic auth if auth embedded to the url" do
36
- FeedParser::Feed.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:http_basic_authentication => ["user", "pass"])).and_return(feed_xml)
37
- FeedParser::Feed.new("http://user:pass@blog.example.com/feed/")
35
+ FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:http_basic_authentication => ["user", "pass"])).and_return(feed_xml)
36
+ fp = FeedParser.new({:url => "http://user:pass@blog.example.com/feed/"}.merge(http_connection_options))
37
+ fp.parse
38
38
  end
39
39
 
40
40
  it "should fetch a feed with only a user name embedded to the url" do
41
- FeedParser::Feed.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:http_basic_authentication => ["user"])).and_return(feed_xml)
42
- FeedParser::Feed.new("http://user@blog.example.com/feed/")
41
+ FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:http_basic_authentication => ["user"])).and_return(feed_xml)
42
+ fp = FeedParser.new({:url => "http://user@blog.example.com/feed/"}.merge(http_connection_options))
43
+ fp.parse
43
44
  end
44
45
 
45
- it "should follow redirect based on the exception message" do
46
- FeedParser::Feed.any_instance.should_receive(:open).with("http://example.com/feed", http_connection_options).and_raise(RuntimeError.new("redirection forbidden: http://example.com/feed -> https://example.com/feed"))
47
- FeedParser::Feed.any_instance.should_receive(:open).with("https://example.com/feed", http_connection_options).and_return(feed_xml)
48
- FeedParser::Feed.new("http://example.com/feed")
46
+ it "should follow redirect based on the exception message (even if OpenURI don't want to do it)" do
47
+ FeedParser.any_instance.should_receive(:open).with("http://example.com/feed", http_connection_options).and_raise(RuntimeError.new("redirection forbidden: http://example.com/feed -> https://example.com/feed"))
48
+ FeedParser.any_instance.should_receive(:open).with("https://example.com/feed", http_connection_options).and_return(feed_xml)
49
+ fp = FeedParser.new({:url => "http://example.com/feed"}.merge(http_connection_options))
50
+ fp.parse
49
51
  end
50
52
 
51
- it "should not follow redirect from secure connection to non-secure one" do
52
- FeedParser::Feed.any_instance.should_receive(:open).with("https://example.com/feed", http_connection_options).and_raise(RuntimeError.new("redirection forbidden: https://example.com/feed -> http://example.com/feed"))
53
- FeedParser::Feed.any_instance.should_not_receive(:open).with("http://example.com/feed", http_connection_options)
53
+ it "should not follow redirect from a secure connection to a non-secure one" do
54
+ FeedParser.any_instance.should_receive(:open).with("https://example.com/feed", http_connection_options).and_raise(RuntimeError.new("redirection forbidden: https://example.com/feed -> http://example.com/feed"))
55
+ FeedParser.any_instance.should_not_receive(:open).with("http://example.com/feed", http_connection_options)
54
56
  lambda {
55
- FeedParser::Feed.new("https://example.com/feed")
57
+ fp = FeedParser.new({:url => "https://example.com/feed"}.merge(http_connection_options))
58
+ fp.parse
56
59
  }.should raise_error(RuntimeError, "redirection forbidden: https://example.com/feed -> http://example.com/feed")
57
60
  end
58
61
 
59
- it "should use alternate url if there is no valid self url in the received feed xml" do
60
- FeedParser::Feed.any_instance.should_receive(:open).with("https://developers.facebook.com/blog/feed", http_connection_options).and_return(feed_xml('facebook.atom.xml'))
61
- lambda {
62
- feed = FeedParser::Feed.new("https://developers.facebook.com/blog/feed")
63
- feed.url.should == "https://developers.facebook.com/blog/feed"
64
- }.should_not raise_error
65
- end
66
-
67
62
  it "should raise an error unless retrieved XML is not an RSS or an ATOM feed" do
68
- FeedParser::Feed.any_instance.should_receive(:open).with("http://example.com/blog/feed/invalid.xml", http_connection_options).and_return("foo bar")
63
+ FeedParser.any_instance.should_receive(:open).with("http://example.com/blog/feed/invalid.xml", http_connection_options).and_return("foo bar")
69
64
  lambda {
70
- FeedParser::Feed.new("http://example.com/blog/feed/invalid.xml")
65
+ fp = FeedParser.new({:url => "http://example.com/blog/feed/invalid.xml"}.merge(http_connection_options))
66
+ fp.parse
71
67
  }.should raise_error(FeedParser::UnknownFeedType, "Feed is not an RSS feed or an ATOM feed")
72
68
  end
73
- end
74
69
 
75
- describe "#parse" do
76
- shared_examples_for "feed parser" do
77
- it "should not fail" do
78
- lambda {
79
- @feed = @feed_parser.parse
80
- }.should_not raise_error
81
- end
82
-
83
- it "should populate every item" do
84
- @feed = @feed_parser.parse
85
- @feed.items.each do |item|
86
- [:guid, :link, :title, :categories, :author, :content].each do |attribute|
87
- item.send(attribute).should_not be_nil
88
- item.send(attribute).should_not be_empty
89
- end
90
- end
91
- end
70
+ it "should not allow feeds without http(s) protocol" do
71
+ lambda {
72
+ fp = FeedParser.new({:url => "feed://example.com/feed"}.merge(http_connection_options))
73
+ fp.parse
74
+ }.should raise_error(FeedParser::InvalidURI, "Only URIs with http or https protocol are supported")
92
75
  end
76
+ end
93
77
 
94
- def case_tester(test_cases)
78
+ describe "::Feed" do
79
+ def case_tester(feed, test_cases)
95
80
  test_cases.each do |test_case|
96
81
  if test_case.last.is_a?(Array)
97
82
  test_case.last.each do |_case|
98
- @feed.as_json[test_case.first].should include(_case)
83
+ feed.as_json[test_case.first].should include(_case)
99
84
  end
100
85
  else
101
- @feed.send(test_case.first).should include(test_case.last)
86
+ feed.send(test_case.first).should include(test_case.last)
102
87
  end
103
88
  end
104
89
  end
105
90
 
91
+ describe "sanitizer" do
92
+ it "should sanitize with custom sanitizer" do
93
+ FeedParser.new(:url => "https://example.com/feed", :sanitizer => NotSaneSanitizer.new)
94
+
95
+ feed = FeedParser::Feed.new(feed_xml('sanitize.me.rss.xml'))
96
+ feed.items.first.content.should_not =~ (/flowdock/i)
97
+ end
98
+
99
+ it "should sanitize custom fields" do
100
+ FeedParser.new(:url => "https://example.com/feed", :sanitizer => NotSaneSanitizer.new, :fields_to_sanitize => [:author, :content])
101
+
102
+ feed = FeedParser::Feed.new(feed_xml('sanitize.me.rss.xml'))
103
+ feed.items.first.author.should == 'Sanitized'
104
+ end
105
+ end
106
+
106
107
  describe "rss feeds" do
107
- before :each do
108
- @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', 'nodeta.rss.xml'))
108
+ it "should be an rss feed" do
109
+ feed = FeedParser::Feed.new(feed_xml('nodeta.rss.xml'))
110
+ feed.type.should == :rss
109
111
  end
110
112
 
111
- after :each do
112
- @feed.type.should == :rss
113
+ it "should populate every item" do
114
+ feed = FeedParser::Feed.new(feed_xml('nodeta.rss.xml'))
115
+ feed.items.each do |item|
116
+ [:guid, :link, :title, :categories, :author, :content].each do |attribute|
117
+ item.send(attribute).should_not be_nil
118
+ item.send(attribute).should_not be_empty
119
+ end
120
+ end
113
121
  end
114
122
 
115
123
  {
@@ -165,40 +173,27 @@ describe FeedParser do
165
173
  },
166
174
  }.each do |rss_fixture, test_cases|
167
175
  it "should parse #{rss_fixture}" do
168
- @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', rss_fixture))
169
-
170
- @feed = @feed_parser.parse
176
+ feed = FeedParser::Feed.new(feed_xml(rss_fixture))
171
177
 
172
- case_tester(test_cases)
178
+ case_tester(feed, test_cases)
173
179
  end
174
180
  end
175
-
176
- it "should sanitize with custom sanitizer" do
177
- @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', 'sanitize.me.rss.xml'), :sanitizer => NotSaneSanitizer.new)
178
-
179
- @feed = @feed_parser.parse
180
-
181
- @feed.items.first.content.should_not =~ (/flowdock/i)
182
- end
183
-
184
- it "should sanitize custom fields" do
185
- @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', 'sanitize.me.rss.xml'), :sanitizer => NotSaneSanitizer.new, :fields_to_sanitize => [:author, :content])
186
-
187
- @feed = @feed_parser.parse
188
-
189
- @feed.items.first.author.should == 'Sanitized'
190
- end
191
-
192
- it_should_behave_like "feed parser"
193
181
  end
194
182
 
195
183
  describe "atom feeds" do
196
- before :each do
197
- @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', 'smashingmagazine.atom.xml'))
184
+ it "should be an atom feed" do
185
+ feed = FeedParser::Feed.new(feed_xml('smashingmagazine.atom.xml'))
186
+ feed.type.should == :atom
198
187
  end
199
188
 
200
- after :each do
201
- @feed.type.should == :atom
189
+ it "should populate every item" do
190
+ feed = FeedParser::Feed.new(feed_xml('smashingmagazine.atom.xml'))
191
+ feed.items.each do |item|
192
+ [:guid, :link, :title, :categories, :author, :content].each do |attribute|
193
+ item.send(attribute).should_not be_nil
194
+ item.send(attribute).should_not be_empty
195
+ end
196
+ end
202
197
  end
203
198
 
204
199
  {
@@ -230,15 +225,18 @@ describe FeedParser do
230
225
  }
231
226
  }.each do |atom_fixture, test_cases|
232
227
  it "should parse #{atom_fixture}" do
233
- @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', atom_fixture))
234
-
235
- @feed = @feed_parser.parse
228
+ feed = FeedParser::Feed.new(feed_xml(atom_fixture))
236
229
 
237
- case_tester(test_cases)
230
+ case_tester(feed, test_cases)
238
231
  end
239
232
  end
240
233
 
241
- it_should_behave_like "feed parser"
234
+ it "should use alternate url if there is no valid self url in the received feed xml" do
235
+ lambda {
236
+ feed = FeedParser::Feed.new(feed_xml('facebook.atom.xml'))
237
+ feed.url.should == "https://developers.facebook.com/blog/feed"
238
+ }.should_not raise_error
239
+ end
242
240
  end
243
241
  end
244
242
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feed_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2012-06-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &2153328640 !ruby/object:Gem::Requirement
16
+ requirement: &2153222140 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,18 +21,29 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2153328640
24
+ version_requirements: *2153222140
25
25
  - !ruby/object:Gem::Dependency
26
- name: rspec-rails
27
- requirement: &2153383440 !ruby/object:Gem::Requirement
26
+ name: rake
27
+ requirement: &2153221640 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
- - - ~>
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0.9'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *2153221640
36
+ - !ruby/object:Gem::Dependency
37
+ name: rspec
38
+ requirement: &2153221140 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
31
42
  - !ruby/object:Gem::Version
32
- version: '2.6'
43
+ version: '2.10'
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *2153383440
46
+ version_requirements: *2153221140
36
47
  description: Rss and Atom feed parser with sanitizer support built on top of Nokogiri.
37
48
  email:
38
49
  - arttu.tervo@gmail.com
@@ -62,7 +73,7 @@ files:
62
73
  - spec/fixtures/sanitize.me.rss.xml
63
74
  - spec/fixtures/scrumalliance.rss.xml
64
75
  - spec/fixtures/smashingmagazine.atom.xml
65
- homepage: http://github.com/arttu/feed_parser
76
+ homepage: https://github.com/arttu/feed_parser
66
77
  licenses: []
67
78
  post_install_message:
68
79
  rdoc_options: []