feed_parser 0.3.4 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,3 @@
1
1
  rvm:
2
- - 1.8.7
3
2
  - 1.9.3
3
+ - 2.0.0
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source :rubygems
1
+ source 'http://rubygems.org'
2
2
 
3
3
  gem 'nokogiri'
4
4
 
@@ -6,7 +6,3 @@ group :test do
6
6
  gem 'rake'
7
7
  gem 'rspec'
8
8
  end
9
-
10
- platform :jruby do
11
- gem 'jruby-openssl'
12
- end
data/README.md CHANGED
@@ -6,7 +6,8 @@ Rss and Atom feed parser built on top of Nokogiri. Supports custom sanitizers.
6
6
 
7
7
  [![Build Status](https://secure.travis-ci.org/arttu/feed_parser.png)](http://travis-ci.org/arttu/feed_parser)
8
8
 
9
- FeedParser gem is tested on Ruby 1.8.7 and 1.9.3.
9
+ FeedParser gem is tested on Ruby 1.9.3 and 2.0.0.
10
+ 1.8.7 should work with Nokogiri < 1.6.0.
10
11
 
11
12
  ## Install
12
13
 
@@ -16,17 +17,28 @@ Add to Gemfile
16
17
 
17
18
  ## Usage
18
19
 
19
- # the most basic use case
20
+ #### Parse from URL
21
+
20
22
  fp = FeedParser.new(:url => "http://example.com/feed/")
21
- # with sanitizer
23
+ feed = fp.parse
24
+
25
+ Optionally pass HTTP options, see more from the OpenURI documentation: http://apidock.com/ruby/OpenURI
26
+
27
+ fp = FeedParser.new(:url => "http://example.com/feed/", :http => {:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE})
28
+
29
+ #### Parse from an XML string
30
+
31
+ fp = FeedParser.new(:feed_xml => "<rss>...</rss>")
32
+ feed = fp.parse
33
+
34
+ #### Use sanitizer
35
+
22
36
  fp = FeedParser.new(:url => "http://example.com/feed/", :sanitizer => MyBestestSanitizer.new)
23
37
  # sanitizing custom field set
24
38
  fp = FeedParser.new(:url => "http://example.com/feed/", :sanitizer => MyBestestSanitizer.new, :fields_to_sanitize => [:title, :content])
25
-
26
- # retrieve the feed xml and parse it
27
- feed = fp.parse
28
-
29
- # using parsed feed in your code
39
+
40
+ #### Using parsed feed in your code
41
+
30
42
  feed.as_json
31
43
  # => {:title => "Feed title", :url => "http://example.com/feed/", :items => [{:guid => , :title => , :author => ...}]}
32
44
 
@@ -34,11 +46,7 @@ Add to Gemfile
34
46
  pp feed_item
35
47
  end
36
48
 
37
- # you can also pass http options to be used for the connection
38
- # for available options, check out the OpenURI documentation: http://apidock.com/ruby/OpenURI
39
- fp = FeedParser.new(:url => "http://example.com/feed/", :http => {:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE})
40
-
41
- If the fetched XML is not a valid RSS or an ATOM feed, a FeedParser::UnknownFeedType is raised in FeedParser#parse.
49
+ If the XML is not a valid RSS or an ATOM feed, a FeedParser::UnknownFeedType is raised in FeedParser#parse.
42
50
 
43
51
  ## Running tests
44
52
 
data/Rakefile CHANGED
@@ -18,18 +18,3 @@ end
18
18
 
19
19
  desc "Default: Run specs"
20
20
  task :default => :spec
21
-
22
- namespace :rubies do
23
- rvm_rubies_command = "rvm 1.8.7-p302@feed_parser,1.9.3-p194@feed_parser do"
24
-
25
- desc "Update dependencies for all Ruby versions"
26
- task :update_dependencies do
27
- system("#{rvm_rubies_command} bundle install")
28
- system("#{rvm_rubies_command} bundle update")
29
- end
30
-
31
- desc "Run tests with Ruby versions 1.8.7 and 1.9.3"
32
- task :spec do
33
- system("#{rvm_rubies_command} bundle exec rake spec")
34
- end
35
- end
@@ -1,12 +1,9 @@
1
1
  # feed_parser.gemspec
2
2
  # -*- encoding: utf-8 -*-
3
3
 
4
- $:.push File.expand_path("../lib", __FILE__)
5
- require 'feed_parser'
6
-
7
4
  Gem::Specification.new do |s|
8
5
  s.name = 'feed_parser'
9
- s.version = FeedParser::VERSION
6
+ s.version = "0.3.5"
10
7
  s.authors = ['Arttu Tervo']
11
8
  s.email = ['arttu.tervo@gmail.com']
12
9
  s.homepage = 'https://github.com/arttu/feed_parser'
@@ -3,18 +3,22 @@ require 'nokogiri'
3
3
 
4
4
  class FeedParser
5
5
 
6
- VERSION = "0.3.4"
7
-
8
6
  USER_AGENT = "Ruby / FeedParser gem"
9
7
 
10
8
  class FeedParser::UnknownFeedType < Exception ; end
11
9
  class FeedParser::InvalidURI < Exception ; end
12
10
 
11
+ def self.parse(opts)
12
+ fp = FeedParser.new(opts)
13
+ fp.parse
14
+ end
15
+
13
16
  def initialize(opts)
14
17
  @url = opts[:url]
15
18
  @http_options = {"User-Agent" => FeedParser::USER_AGENT}.merge(opts[:http] || {})
16
19
  @@sanitizer = (opts[:sanitizer] || SelfSanitizer.new)
17
20
  @@fields_to_sanitize = (opts[:fields_to_sanitize] || [:content])
21
+ @feed_xml = opts[:feed_xml]
18
22
  self
19
23
  end
20
24
 
@@ -27,7 +31,11 @@ class FeedParser
27
31
  end
28
32
 
29
33
  def parse
30
- feed_xml = open_or_follow_redirect(@url)
34
+ if @feed_xml
35
+ feed_xml = @feed_xml
36
+ else
37
+ feed_xml = open_or_follow_redirect(@url)
38
+ end
31
39
  @feed ||= Feed.new(feed_xml)
32
40
  feed_xml.close! if feed_xml.class.to_s == 'Tempfile'
33
41
  @feed
@@ -11,6 +11,7 @@ class FeedParser
11
11
  :item_guid => "guid",
12
12
  :item_link => "link",
13
13
  :item_title => "title",
14
+ :item_published => "pubDate",
14
15
  :item_categories => "category",
15
16
  :item_author => "creator",
16
17
  :item_description => "description",
@@ -26,6 +27,8 @@ class FeedParser
26
27
  :item_guid => "id",
27
28
  :item_link => "link",
28
29
  :item_title => "title",
30
+ :item_published => "published",
31
+ :item_updated => "updated",
29
32
  :item_categories => "category",
30
33
  :item_author => "author/name",
31
34
  :item_description => "summary",
@@ -7,6 +7,7 @@ class FeedParser
7
7
  def initialize(item)
8
8
  @guid = item.xpath(Dsl[@type][:item_guid]).text
9
9
  @title = item.xpath(Dsl[@type][:item_title]).text
10
+ @published = parse_datetime(item.xpath(Dsl[@type][:item_published]).text)
10
11
  @author = item.xpath(Dsl[@type][:item_author]).text
11
12
  @description = possible_html_content(item.xpath(Dsl[@type][:item_description]))
12
13
  @content = possible_html_content(item.xpath(Dsl[@type][:item_content]))
@@ -27,13 +28,14 @@ class FeedParser
27
28
 
28
29
  def as_json
29
30
  {
30
- :guid => guid,
31
- :link => link,
32
- :title => title,
33
- :categories => categories,
34
- :author => author,
35
- :description => description,
36
- :content => content
31
+ :guid => self.guid,
32
+ :link => self.link,
33
+ :title => self.title,
34
+ :published => self.published,
35
+ :categories => self.categories,
36
+ :author => self.author,
37
+ :description => self.description,
38
+ :content => self.content
37
39
  }
38
40
  end
39
41
 
@@ -51,6 +53,15 @@ class FeedParser
51
53
  element.text
52
54
  end
53
55
  end
56
+
57
+ def parse_datetime(string)
58
+ begin
59
+ DateTime.parse(string) unless string.empty?
60
+ rescue
61
+ warn "Failed to parse date #{string.inspect}"
62
+ nil
63
+ end
64
+ end
54
65
  end
55
66
 
56
67
  class RssItem < FeedItem
@@ -67,7 +78,12 @@ class FeedParser
67
78
  @type = :atom
68
79
  super
69
80
  @link = item.xpath(Dsl[@type][:item_link]).attribute("href").text.strip
81
+ @updated = parse_datetime(item.xpath(Dsl[@type][:item_updated]).text)
70
82
  @categories = item.xpath(Dsl[@type][:item_categories]).map{|cat| cat.attribute("term").text}
71
83
  end
84
+
85
+ def published
86
+ @published ||= @updated
87
+ end
72
88
  end
73
89
  end
@@ -18,6 +18,19 @@ describe FeedParser do
18
18
  opts
19
19
  end
20
20
 
21
+ describe ".parse" do
22
+ it "should instantiate a new FeedParser and return a parsed feed" do
23
+ feed = FeedParser::Feed.new(feed_xml)
24
+
25
+ fp = FeedParser.new(:url => "http://blog.example.com/feed/")
26
+ fp.should_receive(:parse).and_return(feed)
27
+
28
+ FeedParser.should_receive(:new).with(:url => "http://blog.example.com/feed/").and_return(fp)
29
+
30
+ FeedParser.parse(:url => "http://blog.example.com/feed/").should == feed
31
+ end
32
+ end
33
+
21
34
  describe "#new" do
22
35
  it "should forward given http options to the OpenURI" do
23
36
  FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE)).and_return(feed_xml)
@@ -73,6 +86,11 @@ describe FeedParser do
73
86
  fp.parse
74
87
  }.should raise_error(FeedParser::InvalidURI, "Only URIs with http or https protocol are supported")
75
88
  end
89
+
90
+ it "should parse feeds from the :feed_xml option instead of the :url" do
91
+ fp = FeedParser.new(:feed_xml => "<rss><channel><item><title>feed_xml test</title><link>http://example.com</link></item></channel></rss>")
92
+ fp.parse.items.first.title == "feed_xml test"
93
+ end
76
94
  end
77
95
 
78
96
  describe "::Feed" do
@@ -120,6 +138,12 @@ describe FeedParser do
120
138
  end
121
139
  end
122
140
 
141
+ it "should set the published date" do
142
+ feed = FeedParser::Feed.new(feed_xml('nodeta.rss.xml'))
143
+ item = feed.items.first
144
+ item.published.should == DateTime.parse("Jul 5, 2009 09:25:32 GMT")
145
+ end
146
+
123
147
  {
124
148
  'nodeta.rss.xml' => {
125
149
  :title => "Nodeta",
@@ -129,6 +153,7 @@ describe FeedParser do
129
153
  :guid => "http://blog.nodeta.fi/?p=73",
130
154
  :link => "http://blog.nodeta.fi/2009/01/16/ruby-187-imported/",
131
155
  :title => "Ruby 1.8.7 imported",
156
+ :published => DateTime.parse("Jan 16, 2009 15:29:52 GMT"),
132
157
  :categories => ["APIdock", "Ruby"],
133
158
  :author => "Otto Hilska",
134
159
  :description => "I just finished importing Ruby 1.8.7 to APIdock. It&#8217;s also the new default version, because usually it is better documented. However, there&#8217;re some incompatibilities between 1.8.6 and 1.8.7, so be sure to check the older documentation when something seems to be wrong.\n",
@@ -144,6 +169,7 @@ describe FeedParser do
144
169
  :guid => "basecamp.00000000.Comment.1234567",
145
170
  :link => "https://awesome.basecamphq.com/unique_item_link",
146
171
  :title => "Comment posted: Re: Howdy how?",
172
+ :published => DateTime.parse("Nov 9, 2011 20:35:18 GMT"),
147
173
  :categories => [],
148
174
  :author => "Ffuuuuuuu- Le.",
149
175
  :description => "<div>trololooo</div><p>Company: awesome | Project: Awesome project</p>",
@@ -159,6 +185,7 @@ describe FeedParser do
159
185
  :guid => "http://scrumalliance.org/articles/424-testing-in-scrum-with-a-waterfall-interaction",
160
186
  :link => "http://scrumalliance.org/articles/424-testing-in-scrum-with-a-waterfall-interaction", # trims the link
161
187
  :title => "Testing in Scrum with a Waterfall Interaction",
188
+ :published => DateTime.parse("May 23, 2012 11:07:03 GMT"),
162
189
  :categories => [],
163
190
  :author => "",
164
191
  :description => "Sometimes, when testing user stories in Scrum, there's a final Waterfall interaction to deal with. The scenario I present here is based on this situation: a Scrum process with an interaction of sequential phases at the end of the process to (re)test the whole developed functionality. These sequential phases are mandatory for our organization, which follows a Waterfall process for the releases of the product. So, for the moment at least, we have to deal with this and my experience is that we aren't alone.",
@@ -196,6 +223,18 @@ describe FeedParser do
196
223
  end
197
224
  end
198
225
 
226
+ it "should set the published date if present" do
227
+ feed = FeedParser::Feed.new(feed_xml('smashingmagazine.atom.xml'))
228
+ item = feed.items.first
229
+ item.published.should == DateTime.parse("Jul 20, 2009 8:43:22 GMT")
230
+ end
231
+
232
+ it "should default the published date to the updated date if not present" do
233
+ feed = FeedParser::Feed.new(feed_xml('facebook.atom.xml'))
234
+ item = feed.items.first
235
+ item.published.should == DateTime.parse("Dec 30, 2011 17:00 GMT")
236
+ end
237
+
199
238
  {
200
239
  'gcal.atom.xml' => {
201
240
  :title => "dokaus.net",
@@ -213,6 +252,7 @@ describe FeedParser do
213
252
  :guid => "urn:uuid:132266233552163",
214
253
  :link => "http://developers.facebook.com/blog/post/614/",
215
254
  :title => "Breaking Change: JavaScript SDK to oauth:true on December 13th",
255
+ :published => DateTime.parse("Dec 12, 2011 17:00 GMT"),
216
256
  :categories=>[],
217
257
  :author => "",
218
258
  :description => "",
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feed_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.3.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-29 00:00:00.000000000 Z
12
+ date: 2013-07-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &2157337400 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,15 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2157337400
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
25
30
  - !ruby/object:Gem::Dependency
26
31
  name: rake
27
- requirement: &2157336900 !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
28
33
  none: false
29
34
  requirements:
30
35
  - - ! '>='
@@ -32,10 +37,15 @@ dependencies:
32
37
  version: '0.9'
33
38
  type: :development
34
39
  prerelease: false
35
- version_requirements: *2157336900
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0.9'
36
46
  - !ruby/object:Gem::Dependency
37
47
  name: rspec
38
- requirement: &2157336400 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
39
49
  none: false
40
50
  requirements:
41
51
  - - ! '>='
@@ -43,7 +53,12 @@ dependencies:
43
53
  version: '2.10'
44
54
  type: :development
45
55
  prerelease: false
46
- version_requirements: *2157336400
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '2.10'
47
62
  description: Rss and Atom feed parser with sanitizer support built on top of Nokogiri.
48
63
  email:
49
64
  - arttu.tervo@gmail.com
@@ -93,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
93
108
  version: '0'
94
109
  requirements: []
95
110
  rubyforge_project:
96
- rubygems_version: 1.8.10
111
+ rubygems_version: 1.8.24
97
112
  signing_key:
98
113
  specification_version: 3
99
114
  summary: Rss and Atom feed parser