feed_parser 0.3.4 → 0.3.5
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +1 -1
- data/Gemfile +1 -5
- data/README.md +21 -13
- data/Rakefile +0 -15
- data/feed_parser.gemspec +1 -4
- data/lib/feed_parser.rb +11 -3
- data/lib/feed_parser/dsl.rb +3 -0
- data/lib/feed_parser/feed_item.rb +23 -7
- data/spec/feed_parser_spec.rb +40 -0
- metadata +24 -9
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -6,7 +6,8 @@ Rss and Atom feed parser built on top of Nokogiri. Supports custom sanitizers.
|
|
6
6
|
|
7
7
|
[![Build Status](https://secure.travis-ci.org/arttu/feed_parser.png)](http://travis-ci.org/arttu/feed_parser)
|
8
8
|
|
9
|
-
FeedParser gem is tested on Ruby 1.
|
9
|
+
FeedParser gem is tested on Ruby 1.9.3 and 2.0.0.
|
10
|
+
1.8.7 should work with Nokogiri < 1.6.0.
|
10
11
|
|
11
12
|
## Install
|
12
13
|
|
@@ -16,17 +17,28 @@ Add to Gemfile
|
|
16
17
|
|
17
18
|
## Usage
|
18
19
|
|
19
|
-
|
20
|
+
#### Parse from URL
|
21
|
+
|
20
22
|
fp = FeedParser.new(:url => "http://example.com/feed/")
|
21
|
-
|
23
|
+
feed = fp.parse
|
24
|
+
|
25
|
+
Optionally pass HTTP options, see more from the OpenURI documentation: http://apidock.com/ruby/OpenURI
|
26
|
+
|
27
|
+
fp = FeedParser.new(:url => "http://example.com/feed/", :http => {:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE})
|
28
|
+
|
29
|
+
#### Parse from an XML string
|
30
|
+
|
31
|
+
fp = FeedParser.new(:feed_xml => "<rss>...</rss>")
|
32
|
+
feed = fp.parse
|
33
|
+
|
34
|
+
#### Use sanitizer
|
35
|
+
|
22
36
|
fp = FeedParser.new(:url => "http://example.com/feed/", :sanitizer => MyBestestSanitizer.new)
|
23
37
|
# sanitizing custom field set
|
24
38
|
fp = FeedParser.new(:url => "http://example.com/feed/", :sanitizer => MyBestestSanitizer.new, :fields_to_sanitize => [:title, :content])
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
# using parsed feed in your code
|
39
|
+
|
40
|
+
#### Using parsed feed in your code
|
41
|
+
|
30
42
|
feed.as_json
|
31
43
|
# => {:title => "Feed title", :url => "http://example.com/feed/", :items => [{:guid => , :title => , :author => ...}]}
|
32
44
|
|
@@ -34,11 +46,7 @@ Add to Gemfile
|
|
34
46
|
pp feed_item
|
35
47
|
end
|
36
48
|
|
37
|
-
|
38
|
-
# for available options, check out the OpenURI documentation: http://apidock.com/ruby/OpenURI
|
39
|
-
fp = FeedParser.new(:url => "http://example.com/feed/", :http => {:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE})
|
40
|
-
|
41
|
-
If the fetched XML is not a valid RSS or an ATOM feed, a FeedParser::UnknownFeedType is raised in FeedParser#parse.
|
49
|
+
If the XML is not a valid RSS or an ATOM feed, a FeedParser::UnknownFeedType is raised in FeedParser#parse.
|
42
50
|
|
43
51
|
## Running tests
|
44
52
|
|
data/Rakefile
CHANGED
@@ -18,18 +18,3 @@ end
|
|
18
18
|
|
19
19
|
desc "Default: Run specs"
|
20
20
|
task :default => :spec
|
21
|
-
|
22
|
-
namespace :rubies do
|
23
|
-
rvm_rubies_command = "rvm 1.8.7-p302@feed_parser,1.9.3-p194@feed_parser do"
|
24
|
-
|
25
|
-
desc "Update dependencies for all Ruby versions"
|
26
|
-
task :update_dependencies do
|
27
|
-
system("#{rvm_rubies_command} bundle install")
|
28
|
-
system("#{rvm_rubies_command} bundle update")
|
29
|
-
end
|
30
|
-
|
31
|
-
desc "Run tests with Ruby versions 1.8.7 and 1.9.3"
|
32
|
-
task :spec do
|
33
|
-
system("#{rvm_rubies_command} bundle exec rake spec")
|
34
|
-
end
|
35
|
-
end
|
data/feed_parser.gemspec
CHANGED
@@ -1,12 +1,9 @@
|
|
1
1
|
# feed_parser.gemspec
|
2
2
|
# -*- encoding: utf-8 -*-
|
3
3
|
|
4
|
-
$:.push File.expand_path("../lib", __FILE__)
|
5
|
-
require 'feed_parser'
|
6
|
-
|
7
4
|
Gem::Specification.new do |s|
|
8
5
|
s.name = 'feed_parser'
|
9
|
-
s.version =
|
6
|
+
s.version = "0.3.5"
|
10
7
|
s.authors = ['Arttu Tervo']
|
11
8
|
s.email = ['arttu.tervo@gmail.com']
|
12
9
|
s.homepage = 'https://github.com/arttu/feed_parser'
|
data/lib/feed_parser.rb
CHANGED
@@ -3,18 +3,22 @@ require 'nokogiri'
|
|
3
3
|
|
4
4
|
class FeedParser
|
5
5
|
|
6
|
-
VERSION = "0.3.4"
|
7
|
-
|
8
6
|
USER_AGENT = "Ruby / FeedParser gem"
|
9
7
|
|
10
8
|
class FeedParser::UnknownFeedType < Exception ; end
|
11
9
|
class FeedParser::InvalidURI < Exception ; end
|
12
10
|
|
11
|
+
def self.parse(opts)
|
12
|
+
fp = FeedParser.new(opts)
|
13
|
+
fp.parse
|
14
|
+
end
|
15
|
+
|
13
16
|
def initialize(opts)
|
14
17
|
@url = opts[:url]
|
15
18
|
@http_options = {"User-Agent" => FeedParser::USER_AGENT}.merge(opts[:http] || {})
|
16
19
|
@@sanitizer = (opts[:sanitizer] || SelfSanitizer.new)
|
17
20
|
@@fields_to_sanitize = (opts[:fields_to_sanitize] || [:content])
|
21
|
+
@feed_xml = opts[:feed_xml]
|
18
22
|
self
|
19
23
|
end
|
20
24
|
|
@@ -27,7 +31,11 @@ class FeedParser
|
|
27
31
|
end
|
28
32
|
|
29
33
|
def parse
|
30
|
-
|
34
|
+
if @feed_xml
|
35
|
+
feed_xml = @feed_xml
|
36
|
+
else
|
37
|
+
feed_xml = open_or_follow_redirect(@url)
|
38
|
+
end
|
31
39
|
@feed ||= Feed.new(feed_xml)
|
32
40
|
feed_xml.close! if feed_xml.class.to_s == 'Tempfile'
|
33
41
|
@feed
|
data/lib/feed_parser/dsl.rb
CHANGED
@@ -11,6 +11,7 @@ class FeedParser
|
|
11
11
|
:item_guid => "guid",
|
12
12
|
:item_link => "link",
|
13
13
|
:item_title => "title",
|
14
|
+
:item_published => "pubDate",
|
14
15
|
:item_categories => "category",
|
15
16
|
:item_author => "creator",
|
16
17
|
:item_description => "description",
|
@@ -26,6 +27,8 @@ class FeedParser
|
|
26
27
|
:item_guid => "id",
|
27
28
|
:item_link => "link",
|
28
29
|
:item_title => "title",
|
30
|
+
:item_published => "published",
|
31
|
+
:item_updated => "updated",
|
29
32
|
:item_categories => "category",
|
30
33
|
:item_author => "author/name",
|
31
34
|
:item_description => "summary",
|
@@ -7,6 +7,7 @@ class FeedParser
|
|
7
7
|
def initialize(item)
|
8
8
|
@guid = item.xpath(Dsl[@type][:item_guid]).text
|
9
9
|
@title = item.xpath(Dsl[@type][:item_title]).text
|
10
|
+
@published = parse_datetime(item.xpath(Dsl[@type][:item_published]).text)
|
10
11
|
@author = item.xpath(Dsl[@type][:item_author]).text
|
11
12
|
@description = possible_html_content(item.xpath(Dsl[@type][:item_description]))
|
12
13
|
@content = possible_html_content(item.xpath(Dsl[@type][:item_content]))
|
@@ -27,13 +28,14 @@ class FeedParser
|
|
27
28
|
|
28
29
|
def as_json
|
29
30
|
{
|
30
|
-
:guid => guid,
|
31
|
-
:link => link,
|
32
|
-
:title => title,
|
33
|
-
:
|
34
|
-
:
|
35
|
-
:
|
36
|
-
:
|
31
|
+
:guid => self.guid,
|
32
|
+
:link => self.link,
|
33
|
+
:title => self.title,
|
34
|
+
:published => self.published,
|
35
|
+
:categories => self.categories,
|
36
|
+
:author => self.author,
|
37
|
+
:description => self.description,
|
38
|
+
:content => self.content
|
37
39
|
}
|
38
40
|
end
|
39
41
|
|
@@ -51,6 +53,15 @@ class FeedParser
|
|
51
53
|
element.text
|
52
54
|
end
|
53
55
|
end
|
56
|
+
|
57
|
+
def parse_datetime(string)
|
58
|
+
begin
|
59
|
+
DateTime.parse(string) unless string.empty?
|
60
|
+
rescue
|
61
|
+
warn "Failed to parse date #{string.inspect}"
|
62
|
+
nil
|
63
|
+
end
|
64
|
+
end
|
54
65
|
end
|
55
66
|
|
56
67
|
class RssItem < FeedItem
|
@@ -67,7 +78,12 @@ class FeedParser
|
|
67
78
|
@type = :atom
|
68
79
|
super
|
69
80
|
@link = item.xpath(Dsl[@type][:item_link]).attribute("href").text.strip
|
81
|
+
@updated = parse_datetime(item.xpath(Dsl[@type][:item_updated]).text)
|
70
82
|
@categories = item.xpath(Dsl[@type][:item_categories]).map{|cat| cat.attribute("term").text}
|
71
83
|
end
|
84
|
+
|
85
|
+
def published
|
86
|
+
@published ||= @updated
|
87
|
+
end
|
72
88
|
end
|
73
89
|
end
|
data/spec/feed_parser_spec.rb
CHANGED
@@ -18,6 +18,19 @@ describe FeedParser do
|
|
18
18
|
opts
|
19
19
|
end
|
20
20
|
|
21
|
+
describe ".parse" do
|
22
|
+
it "should instantiate a new FeedParser and return a parsed feed" do
|
23
|
+
feed = FeedParser::Feed.new(feed_xml)
|
24
|
+
|
25
|
+
fp = FeedParser.new(:url => "http://blog.example.com/feed/")
|
26
|
+
fp.should_receive(:parse).and_return(feed)
|
27
|
+
|
28
|
+
FeedParser.should_receive(:new).with(:url => "http://blog.example.com/feed/").and_return(fp)
|
29
|
+
|
30
|
+
FeedParser.parse(:url => "http://blog.example.com/feed/").should == feed
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
21
34
|
describe "#new" do
|
22
35
|
it "should forward given http options to the OpenURI" do
|
23
36
|
FeedParser.any_instance.should_receive(:open).with("http://blog.example.com/feed/", http_connection_options.merge(:ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE)).and_return(feed_xml)
|
@@ -73,6 +86,11 @@ describe FeedParser do
|
|
73
86
|
fp.parse
|
74
87
|
}.should raise_error(FeedParser::InvalidURI, "Only URIs with http or https protocol are supported")
|
75
88
|
end
|
89
|
+
|
90
|
+
it "should parse feeds from the :feed_xml option instead of the :url" do
|
91
|
+
fp = FeedParser.new(:feed_xml => "<rss><channel><item><title>feed_xml test</title><link>http://example.com</link></item></channel></rss>")
|
92
|
+
fp.parse.items.first.title == "feed_xml test"
|
93
|
+
end
|
76
94
|
end
|
77
95
|
|
78
96
|
describe "::Feed" do
|
@@ -120,6 +138,12 @@ describe FeedParser do
|
|
120
138
|
end
|
121
139
|
end
|
122
140
|
|
141
|
+
it "should set the published date" do
|
142
|
+
feed = FeedParser::Feed.new(feed_xml('nodeta.rss.xml'))
|
143
|
+
item = feed.items.first
|
144
|
+
item.published.should == DateTime.parse("Jul 5, 2009 09:25:32 GMT")
|
145
|
+
end
|
146
|
+
|
123
147
|
{
|
124
148
|
'nodeta.rss.xml' => {
|
125
149
|
:title => "Nodeta",
|
@@ -129,6 +153,7 @@ describe FeedParser do
|
|
129
153
|
:guid => "http://blog.nodeta.fi/?p=73",
|
130
154
|
:link => "http://blog.nodeta.fi/2009/01/16/ruby-187-imported/",
|
131
155
|
:title => "Ruby 1.8.7 imported",
|
156
|
+
:published => DateTime.parse("Jan 16, 2009 15:29:52 GMT"),
|
132
157
|
:categories => ["APIdock", "Ruby"],
|
133
158
|
:author => "Otto Hilska",
|
134
159
|
:description => "I just finished importing Ruby 1.8.7 to APIdock. It’s also the new default version, because usually it is better documented. However, there’re some incompatibilities between 1.8.6 and 1.8.7, so be sure to check the older documentation when something seems to be wrong.\n",
|
@@ -144,6 +169,7 @@ describe FeedParser do
|
|
144
169
|
:guid => "basecamp.00000000.Comment.1234567",
|
145
170
|
:link => "https://awesome.basecamphq.com/unique_item_link",
|
146
171
|
:title => "Comment posted: Re: Howdy how?",
|
172
|
+
:published => DateTime.parse("Nov 9, 2011 20:35:18 GMT"),
|
147
173
|
:categories => [],
|
148
174
|
:author => "Ffuuuuuuu- Le.",
|
149
175
|
:description => "<div>trololooo</div><p>Company: awesome | Project: Awesome project</p>",
|
@@ -159,6 +185,7 @@ describe FeedParser do
|
|
159
185
|
:guid => "http://scrumalliance.org/articles/424-testing-in-scrum-with-a-waterfall-interaction",
|
160
186
|
:link => "http://scrumalliance.org/articles/424-testing-in-scrum-with-a-waterfall-interaction", # trims the link
|
161
187
|
:title => "Testing in Scrum with a Waterfall Interaction",
|
188
|
+
:published => DateTime.parse("May 23, 2012 11:07:03 GMT"),
|
162
189
|
:categories => [],
|
163
190
|
:author => "",
|
164
191
|
:description => "Sometimes, when testing user stories in Scrum, there's a final Waterfall interaction to deal with. The scenario I present here is based on this situation: a Scrum process with an interaction of sequential phases at the end of the process to (re)test the whole developed functionality. These sequential phases are mandatory for our organization, which follows a Waterfall process for the releases of the product. So, for the moment at least, we have to deal with this and my experience is that we aren't alone.",
|
@@ -196,6 +223,18 @@ describe FeedParser do
|
|
196
223
|
end
|
197
224
|
end
|
198
225
|
|
226
|
+
it "should set the published date if present" do
|
227
|
+
feed = FeedParser::Feed.new(feed_xml('smashingmagazine.atom.xml'))
|
228
|
+
item = feed.items.first
|
229
|
+
item.published.should == DateTime.parse("Jul 20, 2009 8:43:22 GMT")
|
230
|
+
end
|
231
|
+
|
232
|
+
it "should default the published date to the updated date if not present" do
|
233
|
+
feed = FeedParser::Feed.new(feed_xml('facebook.atom.xml'))
|
234
|
+
item = feed.items.first
|
235
|
+
item.published.should == DateTime.parse("Dec 30, 2011 17:00 GMT")
|
236
|
+
end
|
237
|
+
|
199
238
|
{
|
200
239
|
'gcal.atom.xml' => {
|
201
240
|
:title => "dokaus.net",
|
@@ -213,6 +252,7 @@ describe FeedParser do
|
|
213
252
|
:guid => "urn:uuid:132266233552163",
|
214
253
|
:link => "http://developers.facebook.com/blog/post/614/",
|
215
254
|
:title => "Breaking Change: JavaScript SDK to oauth:true on December 13th",
|
255
|
+
:published => DateTime.parse("Dec 12, 2011 17:00 GMT"),
|
216
256
|
:categories=>[],
|
217
257
|
:author => "",
|
218
258
|
:description => "",
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feed_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-07-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement:
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,15 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
25
30
|
- !ruby/object:Gem::Dependency
|
26
31
|
name: rake
|
27
|
-
requirement:
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
28
33
|
none: false
|
29
34
|
requirements:
|
30
35
|
- - ! '>='
|
@@ -32,10 +37,15 @@ dependencies:
|
|
32
37
|
version: '0.9'
|
33
38
|
type: :development
|
34
39
|
prerelease: false
|
35
|
-
version_requirements:
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0.9'
|
36
46
|
- !ruby/object:Gem::Dependency
|
37
47
|
name: rspec
|
38
|
-
requirement:
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
39
49
|
none: false
|
40
50
|
requirements:
|
41
51
|
- - ! '>='
|
@@ -43,7 +53,12 @@ dependencies:
|
|
43
53
|
version: '2.10'
|
44
54
|
type: :development
|
45
55
|
prerelease: false
|
46
|
-
version_requirements:
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2.10'
|
47
62
|
description: Rss and Atom feed parser with sanitizer support built on top of Nokogiri.
|
48
63
|
email:
|
49
64
|
- arttu.tervo@gmail.com
|
@@ -93,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
93
108
|
version: '0'
|
94
109
|
requirements: []
|
95
110
|
rubyforge_project:
|
96
|
-
rubygems_version: 1.8.
|
111
|
+
rubygems_version: 1.8.24
|
97
112
|
signing_key:
|
98
113
|
specification_version: 3
|
99
114
|
summary: Rss and Atom feed parser
|