feed_parser 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -23,7 +23,7 @@ Add to Gemfile
23
23
  # sanitizing custom field set
24
24
  fp = FeedParser.new(:url => "http://example.com/feed/", :sanitizer => MyBestestSanitizer.new, :fields_to_sanitize => [:title, :content])
25
25
 
26
- # parse the feed
26
+ # retrieve the feed xml and parse it
27
27
  feed = fp.parse
28
28
 
29
29
  # using parsed feed in your code
@@ -42,7 +42,10 @@ If the fetched XML is not a valid RSS or an ATOM feed, a FeedParser::UnknownFeed
42
42
 
43
43
  ## Running tests
44
44
 
45
- Install dependencies by running `bundle install`.
45
+ Install dependencies:
46
+
47
+ $ gem install bundler
48
+ $ bundle install
46
49
 
47
50
  Run rspec tests:
48
51
 
data/Rakefile CHANGED
@@ -19,9 +19,17 @@ end
19
19
  desc "Default: Run specs"
20
20
  task :default => :spec
21
21
 
22
- namespace :spec do
23
- desc "Run tests with three major Ruby versions"
24
- task :rubies do
25
- system("rvm 1.8.7-p302@feed_parser,1.9.2-p0@feed_parser,1.9.3-p194@feed_parser do bundle exec rake spec")
22
+ namespace :rubies do
23
+ rvm_rubies_command = "rvm 1.8.7-p302@feed_parser,1.9.3-p194@feed_parser do"
24
+
25
+ desc "Update dependencies for all Ruby versions"
26
+ task :update_dependencies do
27
+ system("#{rvm_rubies_command} bundle install")
28
+ system("#{rvm_rubies_command} bundle update")
29
+ end
30
+
31
+ desc "Run tests with Ruby versions 1.8.7 and 1.9.3"
32
+ task :spec do
33
+ system("#{rvm_rubies_command} bundle exec rake spec")
26
34
  end
27
35
  end
data/feed_parser.gemspec CHANGED
@@ -7,19 +7,18 @@ require 'feed_parser'
7
7
  Gem::Specification.new do |s|
8
8
  s.name = 'feed_parser'
9
9
  s.version = FeedParser::VERSION
10
- s.platform = Gem::Platform::RUBY
11
10
  s.authors = ['Arttu Tervo']
12
11
  s.email = ['arttu.tervo@gmail.com']
13
- s.homepage = 'http://github.com/arttu/feed_parser'
12
+ s.homepage = 'https://github.com/arttu/feed_parser'
14
13
  s.summary = %q{Rss and Atom feed parser}
15
14
  s.description = %q{Rss and Atom feed parser with sanitizer support built on top of Nokogiri.}
16
15
 
17
16
  s.add_dependency 'nokogiri'
18
17
 
19
- s.add_development_dependency 'rspec-rails', '~> 2.6'
18
+ s.add_development_dependency 'rake', '>= 0.9'
19
+ s.add_development_dependency 'rspec', '>= 2.10'
20
20
 
21
21
  s.extra_rdoc_files = %w[README.md]
22
- s.require_paths = %w[lib]
23
22
 
24
23
  s.files = `git ls-files`.split("\n")
25
24
  s.test_files = `git ls-files -- spec/*`.split("\n")
data/lib/feed_parser.rb CHANGED
@@ -3,15 +3,16 @@ require 'nokogiri'
3
3
 
4
4
  class FeedParser
5
5
 
6
- VERSION = "0.3.2"
6
+ VERSION = "0.3.3"
7
7
 
8
8
  USER_AGENT = "Ruby / FeedParser gem"
9
9
 
10
10
  class FeedParser::UnknownFeedType < Exception ; end
11
+ class FeedParser::InvalidURI < Exception ; end
11
12
 
12
13
  def initialize(opts)
13
14
  @url = opts[:url]
14
- @http_options = opts[:http] || {}
15
+ @http_options = {"User-Agent" => FeedParser::USER_AGENT}.merge(opts[:http] || {})
15
16
  @@sanitizer = (opts[:sanitizer] || SelfSanitizer.new)
16
17
  @@fields_to_sanitize = (opts[:fields_to_sanitize] || [:content])
17
18
  self
@@ -26,7 +27,34 @@ class FeedParser
26
27
  end
27
28
 
28
29
  def parse
29
- @feed ||= Feed.new(@url, @http_options)
30
+ feed_xml = open_or_follow_redirect(@url)
31
+ @feed ||= Feed.new(feed_xml)
32
+ end
33
+
34
+ private
35
+
36
+ def open_or_follow_redirect(feed_url)
37
+ uri = URI.parse(feed_url)
38
+
39
+ if uri.userinfo
40
+ @http_options[:http_basic_authentication] = [uri.user, uri.password].compact
41
+ uri.userinfo = uri.user = uri.password = nil
42
+ end
43
+
44
+ @http_options[:redirect] = true if RUBY_VERSION >= '1.9'
45
+
46
+ if ['http', 'https'].include?(uri.scheme)
47
+ open(uri.to_s, @http_options)
48
+ else
49
+ raise FeedParser::InvalidURI.new("Only URIs with http or https protocol are supported")
50
+ end
51
+ rescue RuntimeError => ex
52
+ redirect_url = ex.to_s.split(" ").last
53
+ if URI.parse(feed_url).scheme == "http" && URI.parse(redirect_url).scheme == "https"
54
+ open_or_follow_redirect(redirect_url)
55
+ else
56
+ raise ex
57
+ end
30
58
  end
31
59
  end
32
60
 
@@ -2,10 +2,8 @@ class FeedParser
2
2
  class Feed
3
3
  attr_reader :type
4
4
 
5
- def initialize(feed_url, http_options = {})
6
- @http_options = http_options
7
- raw_feed = open_or_follow_redirect(feed_url)
8
- @feed = Nokogiri::XML(raw_feed)
5
+ def initialize(feed_xml)
6
+ @feed = Nokogiri::XML(feed_xml)
9
7
  @feed.remove_namespaces!
10
8
  @type = ((@feed.xpath('/rss')[0] && :rss) || (@feed.xpath('/feed')[0] && :atom))
11
9
  raise FeedParser::UnknownFeedType.new("Feed is not an RSS feed or an ATOM feed") unless @type
@@ -44,34 +42,5 @@ class FeedParser
44
42
  :items => items.map(&:as_json)
45
43
  }
46
44
  end
47
-
48
- private
49
-
50
- # Some feeds
51
- def open_or_follow_redirect(feed_url)
52
- parsed_url = URI.parse(feed_url)
53
-
54
- connection_options = {"User-Agent" => FeedParser::USER_AGENT}
55
- connection_options.merge!(@http_options)
56
- if parsed_url.userinfo
57
- connection_options[:http_basic_authentication] = [parsed_url.user, parsed_url.password].compact
58
- parsed_url.userinfo = parsed_url.user = parsed_url.password = nil
59
- end
60
-
61
- connection_options[:redirect] = true if RUBY_VERSION >= '1.9'
62
-
63
- if parsed_url.scheme
64
- open(parsed_url.to_s, connection_options)
65
- else
66
- open(parsed_url.to_s)
67
- end
68
- rescue RuntimeError => ex
69
- redirect_url = ex.to_s.split(" ").last
70
- if URI.split(feed_url).first == "http" && URI.split(redirect_url).first == "https"
71
- open_or_follow_redirect(redirect_url)
72
- else
73
- raise ex
74
- end
75
- end
76
45
  end
77
46
  end
@@ -20,96 +20,104 @@ describe FeedParser do
20
20
 
21
21
  describe "#new" do
22
22
  it "should forward given http options to the OpenURI" do
23
- FeedParser::Feed.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE)).and_return(feed_xml)
23
+ FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE)).and_return(feed_xml)
24
24
  fp = FeedParser.new(:url => "http://blog.example.com/feed/", :http => {:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE})
25
25
  fp.parse
26
26
  end
27
- end
28
27
 
29
- describe FeedParser::Feed, "#new" do
30
28
  it "should fetch a feed by url" do
31
- FeedParser::Feed.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options).and_return(feed_xml)
32
- FeedParser::Feed.new("http://blog.example.com/feed/")
29
+ FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options).and_return(feed_xml)
30
+ fp = FeedParser.new({:url => "http://blog.example.com/feed/"}.merge(http_connection_options))
31
+ fp.parse
33
32
  end
34
33
 
35
34
  it "should fetch a feed using basic auth if auth embedded to the url" do
36
- FeedParser::Feed.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:http_basic_authentication => ["user", "pass"])).and_return(feed_xml)
37
- FeedParser::Feed.new("http://user:pass@blog.example.com/feed/")
35
+ FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:http_basic_authentication => ["user", "pass"])).and_return(feed_xml)
36
+ fp = FeedParser.new({:url => "http://user:pass@blog.example.com/feed/"}.merge(http_connection_options))
37
+ fp.parse
38
38
  end
39
39
 
40
40
  it "should fetch a feed with only a user name embedded to the url" do
41
- FeedParser::Feed.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:http_basic_authentication => ["user"])).and_return(feed_xml)
42
- FeedParser::Feed.new("http://user@blog.example.com/feed/")
41
+ FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:http_basic_authentication => ["user"])).and_return(feed_xml)
42
+ fp = FeedParser.new({:url => "http://user@blog.example.com/feed/"}.merge(http_connection_options))
43
+ fp.parse
43
44
  end
44
45
 
45
- it "should follow redirect based on the exception message" do
46
- FeedParser::Feed.any_instance.should_receive(:open).with("http://example.com/feed", http_connection_options).and_raise(RuntimeError.new("redirection forbidden: http://example.com/feed -> https://example.com/feed"))
47
- FeedParser::Feed.any_instance.should_receive(:open).with("https://example.com/feed", http_connection_options).and_return(feed_xml)
48
- FeedParser::Feed.new("http://example.com/feed")
46
+ it "should follow redirect based on the exception message (even if OpenURI don't want to do it)" do
47
+ FeedParser.any_instance.should_receive(:open).with("http://example.com/feed", http_connection_options).and_raise(RuntimeError.new("redirection forbidden: http://example.com/feed -> https://example.com/feed"))
48
+ FeedParser.any_instance.should_receive(:open).with("https://example.com/feed", http_connection_options).and_return(feed_xml)
49
+ fp = FeedParser.new({:url => "http://example.com/feed"}.merge(http_connection_options))
50
+ fp.parse
49
51
  end
50
52
 
51
- it "should not follow redirect from secure connection to non-secure one" do
52
- FeedParser::Feed.any_instance.should_receive(:open).with("https://example.com/feed", http_connection_options).and_raise(RuntimeError.new("redirection forbidden: https://example.com/feed -> http://example.com/feed"))
53
- FeedParser::Feed.any_instance.should_not_receive(:open).with("http://example.com/feed", http_connection_options)
53
+ it "should not follow redirect from a secure connection to a non-secure one" do
54
+ FeedParser.any_instance.should_receive(:open).with("https://example.com/feed", http_connection_options).and_raise(RuntimeError.new("redirection forbidden: https://example.com/feed -> http://example.com/feed"))
55
+ FeedParser.any_instance.should_not_receive(:open).with("http://example.com/feed", http_connection_options)
54
56
  lambda {
55
- FeedParser::Feed.new("https://example.com/feed")
57
+ fp = FeedParser.new({:url => "https://example.com/feed"}.merge(http_connection_options))
58
+ fp.parse
56
59
  }.should raise_error(RuntimeError, "redirection forbidden: https://example.com/feed -> http://example.com/feed")
57
60
  end
58
61
 
59
- it "should use alternate url if there is no valid self url in the received feed xml" do
60
- FeedParser::Feed.any_instance.should_receive(:open).with("https://developers.facebook.com/blog/feed", http_connection_options).and_return(feed_xml('facebook.atom.xml'))
61
- lambda {
62
- feed = FeedParser::Feed.new("https://developers.facebook.com/blog/feed")
63
- feed.url.should == "https://developers.facebook.com/blog/feed"
64
- }.should_not raise_error
65
- end
66
-
67
62
  it "should raise an error unless retrieved XML is not an RSS or an ATOM feed" do
68
- FeedParser::Feed.any_instance.should_receive(:open).with("http://example.com/blog/feed/invalid.xml", http_connection_options).and_return("foo bar")
63
+ FeedParser.any_instance.should_receive(:open).with("http://example.com/blog/feed/invalid.xml", http_connection_options).and_return("foo bar")
69
64
  lambda {
70
- FeedParser::Feed.new("http://example.com/blog/feed/invalid.xml")
65
+ fp = FeedParser.new({:url => "http://example.com/blog/feed/invalid.xml"}.merge(http_connection_options))
66
+ fp.parse
71
67
  }.should raise_error(FeedParser::UnknownFeedType, "Feed is not an RSS feed or an ATOM feed")
72
68
  end
73
- end
74
69
 
75
- describe "#parse" do
76
- shared_examples_for "feed parser" do
77
- it "should not fail" do
78
- lambda {
79
- @feed = @feed_parser.parse
80
- }.should_not raise_error
81
- end
82
-
83
- it "should populate every item" do
84
- @feed = @feed_parser.parse
85
- @feed.items.each do |item|
86
- [:guid, :link, :title, :categories, :author, :content].each do |attribute|
87
- item.send(attribute).should_not be_nil
88
- item.send(attribute).should_not be_empty
89
- end
90
- end
91
- end
70
+ it "should not allow feeds without http(s) protocol" do
71
+ lambda {
72
+ fp = FeedParser.new({:url => "feed://example.com/feed"}.merge(http_connection_options))
73
+ fp.parse
74
+ }.should raise_error(FeedParser::InvalidURI, "Only URIs with http or https protocol are supported")
92
75
  end
76
+ end
93
77
 
94
- def case_tester(test_cases)
78
+ describe "::Feed" do
79
+ def case_tester(feed, test_cases)
95
80
  test_cases.each do |test_case|
96
81
  if test_case.last.is_a?(Array)
97
82
  test_case.last.each do |_case|
98
- @feed.as_json[test_case.first].should include(_case)
83
+ feed.as_json[test_case.first].should include(_case)
99
84
  end
100
85
  else
101
- @feed.send(test_case.first).should include(test_case.last)
86
+ feed.send(test_case.first).should include(test_case.last)
102
87
  end
103
88
  end
104
89
  end
105
90
 
91
+ describe "sanitizer" do
92
+ it "should sanitize with custom sanitizer" do
93
+ FeedParser.new(:url => "https://example.com/feed", :sanitizer => NotSaneSanitizer.new)
94
+
95
+ feed = FeedParser::Feed.new(feed_xml('sanitize.me.rss.xml'))
96
+ feed.items.first.content.should_not =~ (/flowdock/i)
97
+ end
98
+
99
+ it "should sanitize custom fields" do
100
+ FeedParser.new(:url => "https://example.com/feed", :sanitizer => NotSaneSanitizer.new, :fields_to_sanitize => [:author, :content])
101
+
102
+ feed = FeedParser::Feed.new(feed_xml('sanitize.me.rss.xml'))
103
+ feed.items.first.author.should == 'Sanitized'
104
+ end
105
+ end
106
+
106
107
  describe "rss feeds" do
107
- before :each do
108
- @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', 'nodeta.rss.xml'))
108
+ it "should be an rss feed" do
109
+ feed = FeedParser::Feed.new(feed_xml('nodeta.rss.xml'))
110
+ feed.type.should == :rss
109
111
  end
110
112
 
111
- after :each do
112
- @feed.type.should == :rss
113
+ it "should populate every item" do
114
+ feed = FeedParser::Feed.new(feed_xml('nodeta.rss.xml'))
115
+ feed.items.each do |item|
116
+ [:guid, :link, :title, :categories, :author, :content].each do |attribute|
117
+ item.send(attribute).should_not be_nil
118
+ item.send(attribute).should_not be_empty
119
+ end
120
+ end
113
121
  end
114
122
 
115
123
  {
@@ -165,40 +173,27 @@ describe FeedParser do
165
173
  },
166
174
  }.each do |rss_fixture, test_cases|
167
175
  it "should parse #{rss_fixture}" do
168
- @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', rss_fixture))
169
-
170
- @feed = @feed_parser.parse
176
+ feed = FeedParser::Feed.new(feed_xml(rss_fixture))
171
177
 
172
- case_tester(test_cases)
178
+ case_tester(feed, test_cases)
173
179
  end
174
180
  end
175
-
176
- it "should sanitize with custom sanitizer" do
177
- @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', 'sanitize.me.rss.xml'), :sanitizer => NotSaneSanitizer.new)
178
-
179
- @feed = @feed_parser.parse
180
-
181
- @feed.items.first.content.should_not =~ (/flowdock/i)
182
- end
183
-
184
- it "should sanitize custom fields" do
185
- @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', 'sanitize.me.rss.xml'), :sanitizer => NotSaneSanitizer.new, :fields_to_sanitize => [:author, :content])
186
-
187
- @feed = @feed_parser.parse
188
-
189
- @feed.items.first.author.should == 'Sanitized'
190
- end
191
-
192
- it_should_behave_like "feed parser"
193
181
  end
194
182
 
195
183
  describe "atom feeds" do
196
- before :each do
197
- @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', 'smashingmagazine.atom.xml'))
184
+ it "should be an atom feed" do
185
+ feed = FeedParser::Feed.new(feed_xml('smashingmagazine.atom.xml'))
186
+ feed.type.should == :atom
198
187
  end
199
188
 
200
- after :each do
201
- @feed.type.should == :atom
189
+ it "should populate every item" do
190
+ feed = FeedParser::Feed.new(feed_xml('smashingmagazine.atom.xml'))
191
+ feed.items.each do |item|
192
+ [:guid, :link, :title, :categories, :author, :content].each do |attribute|
193
+ item.send(attribute).should_not be_nil
194
+ item.send(attribute).should_not be_empty
195
+ end
196
+ end
202
197
  end
203
198
 
204
199
  {
@@ -230,15 +225,18 @@ describe FeedParser do
230
225
  }
231
226
  }.each do |atom_fixture, test_cases|
232
227
  it "should parse #{atom_fixture}" do
233
- @feed_parser = FeedParser.new(:url => File.join(File.dirname(__FILE__), 'fixtures', atom_fixture))
234
-
235
- @feed = @feed_parser.parse
228
+ feed = FeedParser::Feed.new(feed_xml(atom_fixture))
236
229
 
237
- case_tester(test_cases)
230
+ case_tester(feed, test_cases)
238
231
  end
239
232
  end
240
233
 
241
- it_should_behave_like "feed parser"
234
+ it "should use alternate url if there is no valid self url in the received feed xml" do
235
+ lambda {
236
+ feed = FeedParser::Feed.new(feed_xml('facebook.atom.xml'))
237
+ feed.url.should == "https://developers.facebook.com/blog/feed"
238
+ }.should_not raise_error
239
+ end
242
240
  end
243
241
  end
244
242
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feed_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2012-06-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &2153328640 !ruby/object:Gem::Requirement
16
+ requirement: &2153222140 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,18 +21,29 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2153328640
24
+ version_requirements: *2153222140
25
25
  - !ruby/object:Gem::Dependency
26
- name: rspec-rails
27
- requirement: &2153383440 !ruby/object:Gem::Requirement
26
+ name: rake
27
+ requirement: &2153221640 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
- - - ~>
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0.9'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *2153221640
36
+ - !ruby/object:Gem::Dependency
37
+ name: rspec
38
+ requirement: &2153221140 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
31
42
  - !ruby/object:Gem::Version
32
- version: '2.6'
43
+ version: '2.10'
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *2153383440
46
+ version_requirements: *2153221140
36
47
  description: Rss and Atom feed parser with sanitizer support built on top of Nokogiri.
37
48
  email:
38
49
  - arttu.tervo@gmail.com
@@ -62,7 +73,7 @@ files:
62
73
  - spec/fixtures/sanitize.me.rss.xml
63
74
  - spec/fixtures/scrumalliance.rss.xml
64
75
  - spec/fixtures/smashingmagazine.atom.xml
65
- homepage: http://github.com/arttu/feed_parser
76
+ homepage: https://github.com/arttu/feed_parser
66
77
  licenses: []
67
78
  post_install_message:
68
79
  rdoc_options: []