feed_parser 0.3.4 → 0.3.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,3 @@
1
1
  rvm:
2
- - 1.8.7
3
2
  - 1.9.3
3
+ - 2.0.0
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source :rubygems
1
+ source 'http://rubygems.org'
2
2
 
3
3
  gem 'nokogiri'
4
4
 
@@ -6,7 +6,3 @@ group :test do
6
6
  gem 'rake'
7
7
  gem 'rspec'
8
8
  end
9
-
10
- platform :jruby do
11
- gem 'jruby-openssl'
12
- end
data/README.md CHANGED
@@ -6,7 +6,8 @@ Rss and Atom feed parser built on top of Nokogiri. Supports custom sanitizers.
6
6
 
7
7
  [![Build Status](https://secure.travis-ci.org/arttu/feed_parser.png)](http://travis-ci.org/arttu/feed_parser)
8
8
 
9
- FeedParser gem is tested on Ruby 1.8.7 and 1.9.3.
9
+ FeedParser gem is tested on Ruby 1.9.3 and 2.0.0.
10
+ 1.8.7 should work with Nokogiri < 1.6.0.
10
11
 
11
12
  ## Install
12
13
 
@@ -16,17 +17,28 @@ Add to Gemfile
16
17
 
17
18
  ## Usage
18
19
 
19
- # the most basic use case
20
+ #### Parse from URL
21
+
20
22
  fp = FeedParser.new(:url => "http://example.com/feed/")
21
- # with sanitizer
23
+ feed = fp.parse
24
+
25
+ Optionally pass HTTP options, see more from the OpenURI documentation: http://apidock.com/ruby/OpenURI
26
+
27
+ fp = FeedParser.new(:url => "http://example.com/feed/", :http => {:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE})
28
+
29
+ #### Parse from an XML string
30
+
31
+ fp = FeedParser.new(:feed_xml => "<rss>...</rss>")
32
+ feed = fp.parse
33
+
34
+ #### Use sanitizer
35
+
22
36
  fp = FeedParser.new(:url => "http://example.com/feed/", :sanitizer => MyBestestSanitizer.new)
23
37
  # sanitizing custom field set
24
38
  fp = FeedParser.new(:url => "http://example.com/feed/", :sanitizer => MyBestestSanitizer.new, :fields_to_sanitize => [:title, :content])
25
-
26
- # retrieve the feed xml and parse it
27
- feed = fp.parse
28
-
29
- # using parsed feed in your code
39
+
40
+ #### Using parsed feed in your code
41
+
30
42
  feed.as_json
31
43
  # => {:title => "Feed title", :url => "http://example.com/feed/", :items => [{:guid => , :title => , :author => ...}]}
32
44
 
@@ -34,11 +46,7 @@ Add to Gemfile
34
46
  pp feed_item
35
47
  end
36
48
 
37
- # you can also pass http options to be used for the connection
38
- # for available options, check out the OpenURI documentation: http://apidock.com/ruby/OpenURI
39
- fp = FeedParser.new(:url => "http://example.com/feed/", :http => {:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE})
40
-
41
- If the fetched XML is not a valid RSS or an ATOM feed, a FeedParser::UnknownFeedType is raised in FeedParser#parse.
49
+ If the XML is not a valid RSS or an ATOM feed, a FeedParser::UnknownFeedType is raised in FeedParser#parse.
42
50
 
43
51
  ## Running tests
44
52
 
data/Rakefile CHANGED
@@ -18,18 +18,3 @@ end
18
18
 
19
19
  desc "Default: Run specs"
20
20
  task :default => :spec
21
-
22
- namespace :rubies do
23
- rvm_rubies_command = "rvm 1.8.7-p302@feed_parser,1.9.3-p194@feed_parser do"
24
-
25
- desc "Update dependencies for all Ruby versions"
26
- task :update_dependencies do
27
- system("#{rvm_rubies_command} bundle install")
28
- system("#{rvm_rubies_command} bundle update")
29
- end
30
-
31
- desc "Run tests with Ruby versions 1.8.7 and 1.9.3"
32
- task :spec do
33
- system("#{rvm_rubies_command} bundle exec rake spec")
34
- end
35
- end
@@ -1,12 +1,9 @@
1
1
  # feed_parser.gemspec
2
2
  # -*- encoding: utf-8 -*-
3
3
 
4
- $:.push File.expand_path("../lib", __FILE__)
5
- require 'feed_parser'
6
-
7
4
  Gem::Specification.new do |s|
8
5
  s.name = 'feed_parser'
9
- s.version = FeedParser::VERSION
6
+ s.version = "0.3.5"
10
7
  s.authors = ['Arttu Tervo']
11
8
  s.email = ['arttu.tervo@gmail.com']
12
9
  s.homepage = 'https://github.com/arttu/feed_parser'
@@ -3,18 +3,22 @@ require 'nokogiri'
3
3
 
4
4
  class FeedParser
5
5
 
6
- VERSION = "0.3.4"
7
-
8
6
  USER_AGENT = "Ruby / FeedParser gem"
9
7
 
10
8
  class FeedParser::UnknownFeedType < Exception ; end
11
9
  class FeedParser::InvalidURI < Exception ; end
12
10
 
11
+ def self.parse(opts)
12
+ fp = FeedParser.new(opts)
13
+ fp.parse
14
+ end
15
+
13
16
  def initialize(opts)
14
17
  @url = opts[:url]
15
18
  @http_options = {"User-Agent" => FeedParser::USER_AGENT}.merge(opts[:http] || {})
16
19
  @@sanitizer = (opts[:sanitizer] || SelfSanitizer.new)
17
20
  @@fields_to_sanitize = (opts[:fields_to_sanitize] || [:content])
21
+ @feed_xml = opts[:feed_xml]
18
22
  self
19
23
  end
20
24
 
@@ -27,7 +31,11 @@ class FeedParser
27
31
  end
28
32
 
29
33
  def parse
30
- feed_xml = open_or_follow_redirect(@url)
34
+ if @feed_xml
35
+ feed_xml = @feed_xml
36
+ else
37
+ feed_xml = open_or_follow_redirect(@url)
38
+ end
31
39
  @feed ||= Feed.new(feed_xml)
32
40
  feed_xml.close! if feed_xml.class.to_s == 'Tempfile'
33
41
  @feed
@@ -11,6 +11,7 @@ class FeedParser
11
11
  :item_guid => "guid",
12
12
  :item_link => "link",
13
13
  :item_title => "title",
14
+ :item_published => "pubDate",
14
15
  :item_categories => "category",
15
16
  :item_author => "creator",
16
17
  :item_description => "description",
@@ -26,6 +27,8 @@ class FeedParser
26
27
  :item_guid => "id",
27
28
  :item_link => "link",
28
29
  :item_title => "title",
30
+ :item_published => "published",
31
+ :item_updated => "updated",
29
32
  :item_categories => "category",
30
33
  :item_author => "author/name",
31
34
  :item_description => "summary",
@@ -7,6 +7,7 @@ class FeedParser
7
7
  def initialize(item)
8
8
  @guid = item.xpath(Dsl[@type][:item_guid]).text
9
9
  @title = item.xpath(Dsl[@type][:item_title]).text
10
+ @published = parse_datetime(item.xpath(Dsl[@type][:item_published]).text)
10
11
  @author = item.xpath(Dsl[@type][:item_author]).text
11
12
  @description = possible_html_content(item.xpath(Dsl[@type][:item_description]))
12
13
  @content = possible_html_content(item.xpath(Dsl[@type][:item_content]))
@@ -27,13 +28,14 @@ class FeedParser
27
28
 
28
29
  def as_json
29
30
  {
30
- :guid => guid,
31
- :link => link,
32
- :title => title,
33
- :categories => categories,
34
- :author => author,
35
- :description => description,
36
- :content => content
31
+ :guid => self.guid,
32
+ :link => self.link,
33
+ :title => self.title,
34
+ :published => self.published,
35
+ :categories => self.categories,
36
+ :author => self.author,
37
+ :description => self.description,
38
+ :content => self.content
37
39
  }
38
40
  end
39
41
 
@@ -51,6 +53,15 @@ class FeedParser
51
53
  element.text
52
54
  end
53
55
  end
56
+
57
+ def parse_datetime(string)
58
+ begin
59
+ DateTime.parse(string) unless string.empty?
60
+ rescue
61
+ warn "Failed to parse date #{string.inspect}"
62
+ nil
63
+ end
64
+ end
54
65
  end
55
66
 
56
67
  class RssItem < FeedItem
@@ -67,7 +78,12 @@ class FeedParser
67
78
  @type = :atom
68
79
  super
69
80
  @link = item.xpath(Dsl[@type][:item_link]).attribute("href").text.strip
81
+ @updated = parse_datetime(item.xpath(Dsl[@type][:item_updated]).text)
70
82
  @categories = item.xpath(Dsl[@type][:item_categories]).map{|cat| cat.attribute("term").text}
71
83
  end
84
+
85
+ def published
86
+ @published ||= @updated
87
+ end
72
88
  end
73
89
  end
@@ -18,6 +18,19 @@ describe FeedParser do
18
18
  opts
19
19
  end
20
20
 
21
+ describe ".parse" do
22
+ it "should instantiate a new FeedParser and return a parsed feed" do
23
+ feed = FeedParser::Feed.new(feed_xml)
24
+
25
+ fp = FeedParser.new(:url => "http://blog.example.com/feed/")
26
+ fp.should_receive(:parse).and_return(feed)
27
+
28
+ FeedParser.should_receive(:new).with(:url => "http://blog.example.com/feed/").and_return(fp)
29
+
30
+ FeedParser.parse(:url => "http://blog.example.com/feed/").should == feed
31
+ end
32
+ end
33
+
21
34
  describe "#new" do
22
35
  it "should forward given http options to the OpenURI" do
23
36
  FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE)).and_return(feed_xml)
@@ -73,6 +86,11 @@ describe FeedParser do
73
86
  fp.parse
74
87
  }.should raise_error(FeedParser::InvalidURI, "Only URIs with http or https protocol are supported")
75
88
  end
89
+
90
+ it "should parse feeds from the :feed_xml option instead of the :url" do
91
+ fp = FeedParser.new(:feed_xml => "<rss><channel><item><title>feed_xml test</title><link>http://example.com</link></item></channel></rss>")
92
+ fp.parse.items.first.title == "feed_xml test"
93
+ end
76
94
  end
77
95
 
78
96
  describe "::Feed" do
@@ -120,6 +138,12 @@ describe FeedParser do
120
138
  end
121
139
  end
122
140
 
141
+ it "should set the published date" do
142
+ feed = FeedParser::Feed.new(feed_xml('nodeta.rss.xml'))
143
+ item = feed.items.first
144
+ item.published.should == DateTime.parse("Jul 5, 2009 09:25:32 GMT")
145
+ end
146
+
123
147
  {
124
148
  'nodeta.rss.xml' => {
125
149
  :title => "Nodeta",
@@ -129,6 +153,7 @@ describe FeedParser do
129
153
  :guid => "http://blog.nodeta.fi/?p=73",
130
154
  :link => "http://blog.nodeta.fi/2009/01/16/ruby-187-imported/",
131
155
  :title => "Ruby 1.8.7 imported",
156
+ :published => DateTime.parse("Jan 16, 2009 15:29:52 GMT"),
132
157
  :categories => ["APIdock", "Ruby"],
133
158
  :author => "Otto Hilska",
134
159
  :description => "I just finished importing Ruby 1.8.7 to APIdock. It&#8217;s also the new default version, because usually it is better documented. However, there&#8217;re some incompatibilities between 1.8.6 and 1.8.7, so be sure to check the older documentation when something seems to be wrong.\n",
@@ -144,6 +169,7 @@ describe FeedParser do
144
169
  :guid => "basecamp.00000000.Comment.1234567",
145
170
  :link => "https://awesome.basecamphq.com/unique_item_link",
146
171
  :title => "Comment posted: Re: Howdy how?",
172
+ :published => DateTime.parse("Nov 9, 2011 20:35:18 GMT"),
147
173
  :categories => [],
148
174
  :author => "Ffuuuuuuu- Le.",
149
175
  :description => "<div>trololooo</div><p>Company: awesome | Project: Awesome project</p>",
@@ -159,6 +185,7 @@ describe FeedParser do
159
185
  :guid => "http://scrumalliance.org/articles/424-testing-in-scrum-with-a-waterfall-interaction",
160
186
  :link => "http://scrumalliance.org/articles/424-testing-in-scrum-with-a-waterfall-interaction", # trims the link
161
187
  :title => "Testing in Scrum with a Waterfall Interaction",
188
+ :published => DateTime.parse("May 23, 2012 11:07:03 GMT"),
162
189
  :categories => [],
163
190
  :author => "",
164
191
  :description => "Sometimes, when testing user stories in Scrum, there's a final Waterfall interaction to deal with. The scenario I present here is based on this situation: a Scrum process with an interaction of sequential phases at the end of the process to (re)test the whole developed functionality. These sequential phases are mandatory for our organization, which follows a Waterfall process for the releases of the product. So, for the moment at least, we have to deal with this and my experience is that we aren't alone.",
@@ -196,6 +223,18 @@ describe FeedParser do
196
223
  end
197
224
  end
198
225
 
226
+ it "should set the published date if present" do
227
+ feed = FeedParser::Feed.new(feed_xml('smashingmagazine.atom.xml'))
228
+ item = feed.items.first
229
+ item.published.should == DateTime.parse("Jul 20, 2009 8:43:22 GMT")
230
+ end
231
+
232
+ it "should default the published date to the updated date if not present" do
233
+ feed = FeedParser::Feed.new(feed_xml('facebook.atom.xml'))
234
+ item = feed.items.first
235
+ item.published.should == DateTime.parse("Dec 30, 2011 17:00 GMT")
236
+ end
237
+
199
238
  {
200
239
  'gcal.atom.xml' => {
201
240
  :title => "dokaus.net",
@@ -213,6 +252,7 @@ describe FeedParser do
213
252
  :guid => "urn:uuid:132266233552163",
214
253
  :link => "http://developers.facebook.com/blog/post/614/",
215
254
  :title => "Breaking Change: JavaScript SDK to oauth:true on December 13th",
255
+ :published => DateTime.parse("Dec 12, 2011 17:00 GMT"),
216
256
  :categories=>[],
217
257
  :author => "",
218
258
  :description => "",
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feed_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.3.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-29 00:00:00.000000000 Z
12
+ date: 2013-07-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &2157337400 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,15 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2157337400
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
25
30
  - !ruby/object:Gem::Dependency
26
31
  name: rake
27
- requirement: &2157336900 !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
28
33
  none: false
29
34
  requirements:
30
35
  - - ! '>='
@@ -32,10 +37,15 @@ dependencies:
32
37
  version: '0.9'
33
38
  type: :development
34
39
  prerelease: false
35
- version_requirements: *2157336900
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0.9'
36
46
  - !ruby/object:Gem::Dependency
37
47
  name: rspec
38
- requirement: &2157336400 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
39
49
  none: false
40
50
  requirements:
41
51
  - - ! '>='
@@ -43,7 +53,12 @@ dependencies:
43
53
  version: '2.10'
44
54
  type: :development
45
55
  prerelease: false
46
- version_requirements: *2157336400
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '2.10'
47
62
  description: Rss and Atom feed parser with sanitizer support built on top of Nokogiri.
48
63
  email:
49
64
  - arttu.tervo@gmail.com
@@ -93,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
93
108
  version: '0'
94
109
  requirements: []
95
110
  rubyforge_project:
96
- rubygems_version: 1.8.10
111
+ rubygems_version: 1.8.24
97
112
  signing_key:
98
113
  specification_version: 3
99
114
  summary: Rss and Atom feed parser