feed_me 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README +23 -0
- data/Rakefile +59 -0
- data/TODO +2 -0
- data/lib/feed_me.rb +36 -0
- data/lib/feed_me/abstract_parser.rb +119 -0
- data/lib/feed_me/consts.rb +69 -0
- data/lib/feed_me/feed_parser.rb +59 -0
- data/lib/feed_me/feed_struct.rb +13 -0
- data/lib/feed_me/item_parser.rb +25 -0
- data/lib/feed_me/merbtasks.rb +6 -0
- data/lib/feed_me/simple_struct.rb +15 -0
- data/spec/feed_parser_spec.rb +295 -0
- data/spec/item_parser_spec.rb +188 -0
- data/spec/simple_struct_spec.rb +14 -0
- data/spec/spec_helper.rb +25 -0
- metadata +81 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2008 Jonas Nicklas
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
Feed Me
|
2
|
+
=======
|
3
|
+
|
4
|
+
Feed Me is a simple parser for RSS2 and Atom feed, adding other feed formats should be trivial. Feed Me is pretty minimal and basically only does translation/cleanup from different feed formats to a consistent API. It is designed to be minimal.
|
5
|
+
|
6
|
+
Feed Me is built on the excellent Hpricot parser written by _why the lucky stiff.
|
7
|
+
|
8
|
+
Use it like this:
|
9
|
+
|
10
|
+
file = File.read('some_feed.atom')
|
11
|
+
|
12
|
+
feed = FeedMe.parse(file)
|
13
|
+
|
14
|
+
feed.title
|
15
|
+
feed.author.name
|
16
|
+
feed.entries.each do |entry|
|
17
|
+
entry.title
|
18
|
+
entry.content
|
19
|
+
end
|
20
|
+
|
21
|
+
Check out the specs or lib/feed_me/consts.rb for the complete API.
|
22
|
+
|
23
|
+
DISCLAIMER: This is very much alpha software, use at your own risk!
|
data/Rakefile
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake/gempackagetask'
|
3
|
+
require 'spec/rake/spectask'
|
4
|
+
|
5
|
+
file_list = FileList['spec/*_spec.rb']
|
6
|
+
|
7
|
+
namespace :spec do
|
8
|
+
desc "Run all examples with RCov"
|
9
|
+
Spec::Rake::SpecTask.new('rcov') do |t|
|
10
|
+
t.spec_files = file_list
|
11
|
+
t.rcov = true
|
12
|
+
t.rcov_dir = "doc/coverage"
|
13
|
+
t.rcov_opts = ['--exclude', 'spec']
|
14
|
+
end
|
15
|
+
|
16
|
+
desc "Generate an html report"
|
17
|
+
Spec::Rake::SpecTask.new('report') do |t|
|
18
|
+
t.spec_files = file_list
|
19
|
+
t.rcov_opts = ['--exclude', 'spec']
|
20
|
+
t.spec_opts = ["--format", "html:doc/reports/specs.html"]
|
21
|
+
t.fail_on_error = false
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
desc 'Default: run specs.'
|
27
|
+
task :default => 'spec:rcov'
|
28
|
+
|
29
|
+
PLUGIN = "feed_me"
|
30
|
+
NAME = "feed_me"
|
31
|
+
AUTHOR = ["Jonas Nicklas", "Jonathan Stott"]
|
32
|
+
EMAIL = "jonas.nicklas@gmail.com"
|
33
|
+
HOMEPAGE = "http://github.com/jnicklas/feed_me"
|
34
|
+
SUMMARY = "Nice and simple RSS and atom feed parsing built on hpricot"
|
35
|
+
|
36
|
+
spec = Gem::Specification.new do |s|
|
37
|
+
end
|
38
|
+
|
39
|
+
begin
|
40
|
+
require 'jeweler'
|
41
|
+
Jeweler::Tasks.new do |s|
|
42
|
+
s.name = NAME
|
43
|
+
s.platform = Gem::Platform::RUBY
|
44
|
+
s.has_rdoc = true
|
45
|
+
s.extra_rdoc_files = ["README", "LICENSE", 'TODO']
|
46
|
+
s.summary = SUMMARY
|
47
|
+
s.description = s.summary
|
48
|
+
s.authors = AUTHOR
|
49
|
+
s.email = EMAIL
|
50
|
+
s.homepage = HOMEPAGE
|
51
|
+
s.require_path = 'lib'
|
52
|
+
s.autorequire = PLUGIN
|
53
|
+
s.add_dependency('hpricot')
|
54
|
+
s.files = %w(LICENSE README Rakefile TODO) + Dir.glob("{lib,specs}/**/*")
|
55
|
+
end
|
56
|
+
Jeweler::GemcutterTasks.new
|
57
|
+
rescue
|
58
|
+
puts "Jeweler, or one of its dependencies, is not available. Install it with: sudo gem install namelessjon-jeweler -s http://gems.github.com"
|
59
|
+
end
|
data/lib/feed_me.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# make sure we're running inside Merb
|
2
|
+
if defined?(Merb::Plugins)
|
3
|
+
dependency 'hpricot'
|
4
|
+
else
|
5
|
+
require 'rubygems'
|
6
|
+
require 'hpricot'
|
7
|
+
end
|
8
|
+
require 'time'
|
9
|
+
|
10
|
+
unless nil.respond_to? :try
|
11
|
+
# the ultimate duck
|
12
|
+
class Object
|
13
|
+
def try(method, *args)
|
14
|
+
self.send(method, *args)
|
15
|
+
rescue NoMethodError
|
16
|
+
nil
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
module FeedMe
|
22
|
+
class InvalidFeedFormat < StandardError ; end
|
23
|
+
|
24
|
+
def self.parse(feed)
|
25
|
+
FeedMe::FeedParser.parse(feed)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.open(file)
|
29
|
+
FeedMe::FeedParser.parse(file)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
['consts', 'abstract_parser', 'feed_struct', 'simple_struct',
|
34
|
+
'feed_parser', 'item_parser'].each do |f|
|
35
|
+
require File.join(File.dirname(__FILE__), 'feed_me', f)
|
36
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
class FeedMe::AbstractParser
|
2
|
+
|
3
|
+
class << self
|
4
|
+
|
5
|
+
attr_accessor :properties, :root_nodes
|
6
|
+
|
7
|
+
def build(xml, format, *args)
|
8
|
+
# in a world with activesupport this would have been written as
|
9
|
+
# format_parser = (format.to_s.camelize + self.to_s).constantize
|
10
|
+
camelized_format = format.to_s.split('_').map{ |w| w.capitalize }.join('')
|
11
|
+
bare_class = self.to_s.split('::').last
|
12
|
+
|
13
|
+
begin
|
14
|
+
format_parser = FeedMe.const_get(camelized_format + bare_class)
|
15
|
+
rescue NameError
|
16
|
+
end
|
17
|
+
|
18
|
+
if format_parser.is_a?(Class) and format_parser.ancestors.include?(self)
|
19
|
+
return format_parser.new(xml, format, *args)
|
20
|
+
else
|
21
|
+
return self.new(xml, format, *args)
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize(xml, format)
|
29
|
+
self.xml = xml
|
30
|
+
self.format = format
|
31
|
+
self.properties = self.class.properties[self.format]
|
32
|
+
|
33
|
+
append_methods
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_hash
|
37
|
+
hash = {}
|
38
|
+
self.properties.each do |method, p|
|
39
|
+
hash[method] = self.send(method)
|
40
|
+
end
|
41
|
+
return hash
|
42
|
+
end
|
43
|
+
|
44
|
+
attr_accessor :xml, :format, :properties
|
45
|
+
|
46
|
+
alias_method :root_node, :xml
|
47
|
+
|
48
|
+
protected
|
49
|
+
|
50
|
+
def fetch_rss_person(selector)
|
51
|
+
item = fetch(selector)
|
52
|
+
if(item)
|
53
|
+
email, name = item.split(/\s+/, 2)
|
54
|
+
name = name.match( /\((.*?)\)/ ).to_a[1] if name # strip parentheses
|
55
|
+
else
|
56
|
+
name, email = nil
|
57
|
+
end
|
58
|
+
FeedMe::SimpleStruct.new(:email => email, :name => name, :uri => nil)
|
59
|
+
end
|
60
|
+
|
61
|
+
def append_methods
|
62
|
+
self.properties.each do |method, p|
|
63
|
+
unless respond_to?(method)
|
64
|
+
block = get_proc_for_property(method, p)
|
65
|
+
# meta programming magic
|
66
|
+
(class << self; self; end).module_eval do
|
67
|
+
define_method method, &block
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def get_proc_for_property(method, p)
|
74
|
+
if p.class == Array
|
75
|
+
return caching_proc(method, &proc { fetch("/#{p[0]}", root_node, p[1].to_sym) })
|
76
|
+
elsif p.class == Hash
|
77
|
+
return caching_proc(method, &proc { FeedMe::FeedStruct.new(root_node, p) })
|
78
|
+
elsif p != :undefined
|
79
|
+
return caching_proc(method, &proc { fetch("/#{p}", root_node) })
|
80
|
+
else
|
81
|
+
return proc { nil }
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def caching_proc(name, &block)
|
86
|
+
proc do
|
87
|
+
ivar = instance_variable_get("@#{name}")
|
88
|
+
unless ivar
|
89
|
+
result = yield
|
90
|
+
instance_variable_set("@#{name}", result)
|
91
|
+
return result
|
92
|
+
end
|
93
|
+
ivar
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def fetch(selector, search_in = xml, method = :inner_html)
|
98
|
+
item = search_in.search(selector)
|
99
|
+
|
100
|
+
unless method == :array
|
101
|
+
self.try("extract_" + method.to_s, item.first) unless item.empty?
|
102
|
+
else
|
103
|
+
item.map { |i| self.try("extract_inner_html", i) }
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def extract_inner_html(item)
|
108
|
+
item.inner_html
|
109
|
+
end
|
110
|
+
|
111
|
+
def extract_href(item)
|
112
|
+
item[:href]
|
113
|
+
end
|
114
|
+
|
115
|
+
def extract_time(item)
|
116
|
+
Time.parse(item.inner_html).utc
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module FeedMe
|
2
|
+
|
3
|
+
ROOT_NODES = {
|
4
|
+
:atom => "//feed[@xmlns='http://www.w3.org/2005/Atom']",
|
5
|
+
:rss2 => "//rss[@version=2.0]/channel"
|
6
|
+
}
|
7
|
+
|
8
|
+
FEED_PROPERTIES = {
|
9
|
+
:atom => {
|
10
|
+
:title => :title,
|
11
|
+
:updated_at => [:updated, :time],
|
12
|
+
:feed_id => :id,
|
13
|
+
:url => ["link[@rel=alternate]", :href],
|
14
|
+
:href => ["link[@rel=self]", :href],
|
15
|
+
:description => :subtitle,
|
16
|
+
:generator => :generator,
|
17
|
+
:author => {
|
18
|
+
:email => 'author/email',
|
19
|
+
:name => 'author/name',
|
20
|
+
:uri => 'author/uri'
|
21
|
+
},
|
22
|
+
:entries => :special
|
23
|
+
},
|
24
|
+
:rss2 => {
|
25
|
+
:title => :title,
|
26
|
+
:updated_at => [:lastBuildDate, :time],
|
27
|
+
:feed_id => :undefined,
|
28
|
+
:url => :link,
|
29
|
+
:href => :undefined,
|
30
|
+
:description => :description,
|
31
|
+
:generator => :generator,
|
32
|
+
:author => :special,
|
33
|
+
:entries => :special
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
ITEM_PROPERTIES = {
|
38
|
+
:atom => {
|
39
|
+
:title => :title,
|
40
|
+
:updated_at => [:updated, :time],
|
41
|
+
:item_id => :id,
|
42
|
+
:url => ["link[@rel=alternate]", :href],
|
43
|
+
:content => :content,
|
44
|
+
:author => {
|
45
|
+
:email => 'author/email',
|
46
|
+
:name => 'author/name',
|
47
|
+
:uri => 'author/uri'
|
48
|
+
}
|
49
|
+
},
|
50
|
+
:rss2 => {
|
51
|
+
:title => :title,
|
52
|
+
:updated_at => [:pubDate, :time],
|
53
|
+
:item_id => :guid,
|
54
|
+
:url => :link,
|
55
|
+
:content => :description,
|
56
|
+
:author => :special,
|
57
|
+
:categories => [:category, :array]
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
AUTHOR_PROPERTIES = {
|
62
|
+
:atom => {
|
63
|
+
:name => :name,
|
64
|
+
:uri => :uri,
|
65
|
+
:email => :email
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module FeedMe
|
2
|
+
|
3
|
+
class FeedParser < AbstractParser
|
4
|
+
|
5
|
+
self.properties = FEED_PROPERTIES
|
6
|
+
|
7
|
+
class << self
|
8
|
+
|
9
|
+
def open(file)
|
10
|
+
self.parse(Kernel.open(file).read)
|
11
|
+
end
|
12
|
+
|
13
|
+
# parses the passed feed and identifeis what kind of feed it is
|
14
|
+
# then returns a parser object
|
15
|
+
def parse(feed)
|
16
|
+
xml = Hpricot.XML(feed)
|
17
|
+
|
18
|
+
root_node, format = self.identify(xml)
|
19
|
+
raise InvalidFeedFormat if format.nil?
|
20
|
+
|
21
|
+
self.build(root_node, format)
|
22
|
+
end
|
23
|
+
|
24
|
+
protected
|
25
|
+
|
26
|
+
def identify(xml)
|
27
|
+
FeedMe::ROOT_NODES.each do |f, s|
|
28
|
+
item = xml.at(s)
|
29
|
+
return item, f if item
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class AtomFeedParser < FeedParser
|
37
|
+
self.properties = FEED_PROPERTIES
|
38
|
+
|
39
|
+
def entries
|
40
|
+
xml.search('entry').map do |el|
|
41
|
+
ItemParser.build(el, self.format, self)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
class Rss2FeedParser < FeedParser
|
47
|
+
self.properties = FEED_PROPERTIES
|
48
|
+
|
49
|
+
def entries
|
50
|
+
xml.search('item').map do |el|
|
51
|
+
ItemParser.build(el, self.format, self)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def author
|
56
|
+
fetch_rss_person("managingEditor")
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module FeedMe
|
2
|
+
|
3
|
+
class ItemParser < AbstractParser
|
4
|
+
|
5
|
+
self.properties = ITEM_PROPERTIES
|
6
|
+
|
7
|
+
attr_accessor :feed
|
8
|
+
|
9
|
+
def initialize(xml, format, feed)
|
10
|
+
super(xml, format)
|
11
|
+
self.feed = feed
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
class Rss2ItemParser < ItemParser
|
17
|
+
|
18
|
+
self.properties = ITEM_PROPERTIES
|
19
|
+
|
20
|
+
def author
|
21
|
+
fetch_rss_person("author")
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,295 @@
|
|
1
|
+
require File.join( File.dirname(__FILE__), "spec_helper" )
|
2
|
+
|
3
|
+
require 'feed_me'
|
4
|
+
|
5
|
+
describe "all parsing methods", :shared => true do
|
6
|
+
it "should identify an atom feed" do
|
7
|
+
@atom.should be_an_instance_of(FeedMe::AtomFeedParser)
|
8
|
+
@atom.format.should == :atom
|
9
|
+
@atom.root_node.xpath == "//feed[@xmlns='http://www.w3.org/2005/Atom']"
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should identify an rss2 feed" do
|
13
|
+
@rss2.should be_an_instance_of(FeedMe::Rss2FeedParser)
|
14
|
+
@rss2.format.should == :rss2
|
15
|
+
@rss2.root_node.xpath == "//rss[@version=2.0]/channel"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe FeedMe::FeedParser do
|
20
|
+
|
21
|
+
before :each do
|
22
|
+
@atom_feed = hpricot_fixture('welformed.atom') / "//feed[@xmlns='http://www.w3.org/2005/Atom']"
|
23
|
+
@atom = FeedMe::FeedParser.build(@atom_feed, :atom)
|
24
|
+
@rss2_feed = hpricot_fixture('welformed.rss2') / "//rss[@version=2.0]/channel"
|
25
|
+
@rss2 = FeedMe::FeedParser.build(@rss2_feed, :rss2)
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should be an atom parser for an atom feed" do
|
29
|
+
@atom.should be_an_instance_of(FeedMe::AtomFeedParser)
|
30
|
+
end
|
31
|
+
|
32
|
+
describe ".parse" do
|
33
|
+
before(:each) do
|
34
|
+
@atom = FeedMe::FeedParser.parse(open(fixture('welformed.atom')).read)
|
35
|
+
@rss2 = FeedMe::FeedParser.parse(open(fixture('welformed.rss2')).read)
|
36
|
+
end
|
37
|
+
|
38
|
+
it_should_behave_like "all parsing methods"
|
39
|
+
|
40
|
+
describe "with bad input" do
|
41
|
+
it "should raise on an empty body" do
|
42
|
+
lambda { FeedMe::FeedParser.parse("") }.should raise_error(FeedMe::InvalidFeedFormat)
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should raise on a body with non-recognised xml" do
|
46
|
+
lambda {
|
47
|
+
FeedMe::FeedParser.parse(%Q|<?xml version="1.0" encoding="UTF-8"?>"<foo>bar</foo>|)
|
48
|
+
}.should raise_error(FeedMe::InvalidFeedFormat)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
describe ".open" do
|
55
|
+
before(:each) do
|
56
|
+
@atom = FeedMe::FeedParser.open(fixture('welformed.atom'))
|
57
|
+
@rss2 = FeedMe::FeedParser.open(fixture('welformed.rss2'))
|
58
|
+
end
|
59
|
+
|
60
|
+
it_should_behave_like "all parsing methods"
|
61
|
+
end
|
62
|
+
|
63
|
+
describe '#title' do
|
64
|
+
it "should be valid for an atom feed" do
|
65
|
+
@atom.title.should == "Test feed"
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should be valid for an rss2 feed" do
|
69
|
+
@rss2.title.should == "Lift Off News"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe '#description' do
|
74
|
+
it "should be valid for an atom feed" do
|
75
|
+
@atom.description.should == "Monkey test feed"
|
76
|
+
end
|
77
|
+
|
78
|
+
it "should be valid for an rss2 feed" do
|
79
|
+
@rss2.description.should == "Liftoff to Space Exploration."
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe '#feed_id' do
|
84
|
+
it "should be valid for an atom feed" do
|
85
|
+
@atom.feed_id.should == "tag:imaginary.host:nyheter"
|
86
|
+
end
|
87
|
+
|
88
|
+
it "should be nil for an rss2 feed" do
|
89
|
+
@rss2.feed_id.should be_nil
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
describe '#updated_at' do
|
94
|
+
it "should be valid for an atom feed" do
|
95
|
+
@atom.updated_at.should == Time.utc(2008, 3, 7, 20, 41, 10)
|
96
|
+
end
|
97
|
+
|
98
|
+
it "should be valid for an rss2 feed" do
|
99
|
+
@rss2.updated_at.should == Time.utc(2003, 6, 10, 9, 41, 1)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
describe '#href' do
|
104
|
+
it "should be valid for an atom feed" do
|
105
|
+
@atom.href.should == "http://imaginary.host/posts.atom"
|
106
|
+
end
|
107
|
+
|
108
|
+
it "should be nil for an atom feed" do
|
109
|
+
@rss2.href.should be_nil
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
describe '#url' do
|
114
|
+
it "should be valid for an atom feed" do
|
115
|
+
@atom.url.should == "http://imaginary.host/posts"
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should be valid for an rss2 feed" do
|
119
|
+
@rss2.url.should == "http://liftoff.msfc.nasa.gov/"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
describe '#generator' do
|
124
|
+
it "should be valid for an atom feed" do
|
125
|
+
@atom.generator.should == "Roll your own"
|
126
|
+
end
|
127
|
+
|
128
|
+
it "should be valid for an rss2 feed" do
|
129
|
+
@rss2.generator.should == "Weblog Editor 2.0"
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
describe '#format' do
|
134
|
+
it "should be :atom for an atom feed" do
|
135
|
+
@atom.format.should == :atom
|
136
|
+
end
|
137
|
+
|
138
|
+
it "should be :rss2 for an rss2 feed" do
|
139
|
+
@rss2.format.should == :rss2
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
describe '#author.name' do
|
144
|
+
it "should be valid for an atom feed" do
|
145
|
+
@atom.author.name.should == "Frank"
|
146
|
+
end
|
147
|
+
|
148
|
+
it "should be valid for an rss2 feed" do
|
149
|
+
@rss2.author.name.should == "Mary Jo"
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
describe '#author.email' do
|
154
|
+
it "should be valid for an atom feed" do
|
155
|
+
@atom.author.email.should == "frank@imaginary.host"
|
156
|
+
end
|
157
|
+
|
158
|
+
it "should be valid for an rss2 feed" do
|
159
|
+
@rss2.author.email.should == "editor@example.com"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
describe '#author.uri' do
|
164
|
+
it "should be valid for an atom feed" do
|
165
|
+
@atom.author.uri.should == "http://imaginary.host/students/frank"
|
166
|
+
end
|
167
|
+
|
168
|
+
it "should be nil for an rss2 feed" do
|
169
|
+
@rss2.author.uri.should be_nil
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
describe '#entries' do
|
174
|
+
it "should return an array of entries for an atom feed" do
|
175
|
+
@atom.entries.should be_an_instance_of(Array)
|
176
|
+
end
|
177
|
+
|
178
|
+
it "should have the correct length for an atom feed" do
|
179
|
+
@atom.should have(3).entries
|
180
|
+
end
|
181
|
+
|
182
|
+
it "should return items that are properly parsed for an atom feed" do
|
183
|
+
@atom.entries.first.title.should == "First title"
|
184
|
+
@atom.entries.first.url.should == "http://imaginary.host/posts/3"
|
185
|
+
end
|
186
|
+
|
187
|
+
it "should return an array of entries for an rss2 feed" do
|
188
|
+
@rss2.entries.should be_an_instance_of(Array)
|
189
|
+
end
|
190
|
+
|
191
|
+
it "should have the correct length for an rss2 feed" do
|
192
|
+
@rss2.should have(4).entries
|
193
|
+
end
|
194
|
+
|
195
|
+
it "should return items that are properly parsed for an rss2 feed" do
|
196
|
+
@rss2.entries.first.title.should == "Star City"
|
197
|
+
@rss2.entries.first.url.should == "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
|
198
|
+
@rss2.entries.first.item_id.should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
|
199
|
+
end
|
200
|
+
|
201
|
+
it "should allow items to be read more than once" do
|
202
|
+
item = @rss2.entries.first
|
203
|
+
item.item_id.should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
|
204
|
+
item.item_id.should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
describe '#to_hash' do
|
209
|
+
it "should serialize the title of an atom feed" do
|
210
|
+
@atom.to_hash[:title].should == "Test feed"
|
211
|
+
end
|
212
|
+
|
213
|
+
it "should serialize the description of an atom feed" do
|
214
|
+
@atom.to_hash[:description].should == "Monkey test feed"
|
215
|
+
end
|
216
|
+
|
217
|
+
it "should serialize the feed_id of an atom feed" do
|
218
|
+
@atom.to_hash[:feed_id].should == "tag:imaginary.host:nyheter"
|
219
|
+
end
|
220
|
+
|
221
|
+
it "should serialize the updated_at time of an atom feed" do
|
222
|
+
@atom.to_hash[:updated_at].should == Time.utc(2008, 3, 7, 20, 41, 10)
|
223
|
+
end
|
224
|
+
|
225
|
+
it "should serialize the href of an atom feed" do
|
226
|
+
@atom.to_hash[:href].should == "http://imaginary.host/posts.atom"
|
227
|
+
end
|
228
|
+
|
229
|
+
it "should serialize the url of an atom feed" do
|
230
|
+
@atom.to_hash[:url].should == "http://imaginary.host/posts"
|
231
|
+
end
|
232
|
+
|
233
|
+
it "should serialize the generator of an atom feed" do
|
234
|
+
@atom.to_hash[:generator].should == "Roll your own"
|
235
|
+
end
|
236
|
+
|
237
|
+
it "should serialize the entries of an atom feed" do
|
238
|
+
@atom.to_hash[:entries].should be_an_instance_of(Array)
|
239
|
+
@atom.to_hash[:entries].first.title.should == "First title"
|
240
|
+
@atom.to_hash[:entries].first.url.should == "http://imaginary.host/posts/3"
|
241
|
+
end
|
242
|
+
|
243
|
+
it "should serialize the author of an atom feed" do
|
244
|
+
author = @atom.to_hash[:author]
|
245
|
+
|
246
|
+
author.name.should == "Frank"
|
247
|
+
author.email.should == "frank@imaginary.host"
|
248
|
+
author.uri.should == "http://imaginary.host/students/frank"
|
249
|
+
end
|
250
|
+
|
251
|
+
it "should serialize the title of an rss2 feed" do
|
252
|
+
@rss2.to_hash[:title].should == "Lift Off News"
|
253
|
+
end
|
254
|
+
|
255
|
+
it "should serialize the description of an rss2 feed" do
|
256
|
+
@rss2.to_hash[:description].should == "Liftoff to Space Exploration."
|
257
|
+
end
|
258
|
+
|
259
|
+
it "should serialize the feed_id of an rss2 feed" do
|
260
|
+
@rss2.to_hash[:feed_id].should be_nil
|
261
|
+
end
|
262
|
+
|
263
|
+
it "should serialize the updated_at time of an rss2 feed" do
|
264
|
+
@rss2.to_hash[:updated_at].should == Time.utc(2003, 6, 10, 9, 41, 1)
|
265
|
+
end
|
266
|
+
|
267
|
+
it "should serialize the href of an rss2 feed" do
|
268
|
+
@rss2.to_hash[:href].should be_nil
|
269
|
+
end
|
270
|
+
|
271
|
+
it "should serialize the url of an rss2 feed" do
|
272
|
+
@rss2.to_hash[:url].should == "http://liftoff.msfc.nasa.gov/"
|
273
|
+
end
|
274
|
+
|
275
|
+
it "should serialize the generator of an rss2 feed" do
|
276
|
+
@rss2.to_hash[:generator].should == "Weblog Editor 2.0"
|
277
|
+
end
|
278
|
+
|
279
|
+
it "should serialize the entries of an rss2 feed" do
|
280
|
+
@rss2.to_hash[:entries].should be_an_instance_of(Array)
|
281
|
+
@rss2.to_hash[:entries].first.title.should == "Star City"
|
282
|
+
@rss2.to_hash[:entries].first.url.should == "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
|
283
|
+
end
|
284
|
+
|
285
|
+
it "should serialize the author of an rss2 feed" do
|
286
|
+
|
287
|
+
author = @rss2.to_hash[:author]
|
288
|
+
|
289
|
+
author.name.should == "Mary Jo"
|
290
|
+
author.email.should == "editor@example.com"
|
291
|
+
author.uri.should be_nil
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
end
|
@@ -0,0 +1,188 @@
|
|
1
|
+
require File.join( File.dirname(__FILE__), "spec_helper" )
|
2
|
+
|
3
|
+
require 'feed_me'
|
4
|
+
|
5
|
+
describe FeedMe::ItemParser do
|
6
|
+
|
7
|
+
before :each do
|
8
|
+
@atom_feed = FeedMe::FeedParser.open(fixture('welformed.atom'))
|
9
|
+
@atom = FeedMe::ItemParser.build(@atom_feed.root_node.search('/entry').first, :atom, @atom_feed)
|
10
|
+
@rss2_feed = FeedMe::FeedParser.open(fixture('welformed.rss2'))
|
11
|
+
@rss2 = FeedMe::ItemParser.build(@rss2_feed.root_node.search('/item').first, :rss2, @rss2_feed)
|
12
|
+
end
|
13
|
+
|
14
|
+
describe '#to_hash' do
|
15
|
+
it "should serialize the parsed properties to a hash" do
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '#title' do
|
21
|
+
it "should be valid for an atom feed" do
|
22
|
+
@atom.title.should == "First title"
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should be valid for an rss2 feed" do
|
26
|
+
@rss2.title.should == "Star City"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe '#content' do
|
31
|
+
it "should be valid for an atom feed" do
|
32
|
+
@atom.content.should == "Here be content"
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should be valid for an rss2 feed" do
|
36
|
+
@rss2.content.should == "This is content"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe '#item_id' do
|
41
|
+
it "should be valid for an atom feed" do
|
42
|
+
@atom.item_id.should == "tag:imaginary.host,2008-03-07:nyheter/3"
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should be valid for an rss2 feed" do
|
46
|
+
@rss2.item_id.should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe '#updated_at' do
|
51
|
+
it "should be valid for an atom feed" do
|
52
|
+
@atom.updated_at.should == Time.utc( 2008, 3, 7, 20, 41, 10 )
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should be valid for an rss2 feed" do
|
56
|
+
@rss2.updated_at.should == Time.utc(2003, 6, 3, 9, 39, 21)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe '#url' do
|
61
|
+
it "should be valid for an atom feed" do
|
62
|
+
@atom.url.should == "http://imaginary.host/posts/3"
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should be valid for an rss2 feed" do
|
66
|
+
@rss2.url.should == "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
describe '#format' do
|
71
|
+
it "should be :atom for an atom feed" do
|
72
|
+
@atom.format.should == :atom
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should be :rss2 for an rss2 feed" do
|
76
|
+
@rss2.format.should == :rss2
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
describe '#categories' do
|
81
|
+
it "should be correct for an rss2 feed" do
|
82
|
+
@rss2.categories.should == ['news', 'chuck']
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
describe '#author.name' do
|
87
|
+
it "should be valid for an atom feed" do
|
88
|
+
@atom.author.name.should == "Jonas Nicklas"
|
89
|
+
end
|
90
|
+
|
91
|
+
it "should be valid for an rss2 feed" do
|
92
|
+
@rss2.author.name.should == "Chuck Norris"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
describe '#author.email' do
|
97
|
+
it "should be valid for an atom feed" do
|
98
|
+
@atom.author.email.should == "jonas.nicklas@imaginary.host"
|
99
|
+
end
|
100
|
+
|
101
|
+
it "should be valid for an rss2 feed" do
|
102
|
+
@rss2.author.email.should == "da_man@example.com"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
describe '#author.uri' do
|
107
|
+
it "should be valid for an atom feed" do
|
108
|
+
@atom.author.uri.should == "http://imaginary.host/students/jnicklas"
|
109
|
+
end
|
110
|
+
|
111
|
+
it "should be nil for an rss2 feed" do
|
112
|
+
@rss2.author.uri.should be_nil
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
describe '#to_hash' do
|
117
|
+
|
118
|
+
it "should serialize the title for an atom feed" do
|
119
|
+
@atom.to_hash[:title].should == "First title"
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should serialize the item_id for an atom feed" do
|
123
|
+
@atom.to_hash[:item_id].should == "tag:imaginary.host,2008-03-07:nyheter/3"
|
124
|
+
end
|
125
|
+
|
126
|
+
it "should serialize updated_at for an atom feed" do
|
127
|
+
@atom.to_hash[:updated_at].should == Time.utc( 2008, 3, 7, 20, 41, 10 )
|
128
|
+
end
|
129
|
+
|
130
|
+
it "should serialize the url for an atom feed" do
|
131
|
+
@atom.to_hash[:url].should == "http://imaginary.host/posts/3"
|
132
|
+
end
|
133
|
+
|
134
|
+
it "should serialize the author of an atom feed" do
|
135
|
+
author = @atom.to_hash[:author]
|
136
|
+
|
137
|
+
author.name.should == "Jonas Nicklas"
|
138
|
+
author.email.should == "jonas.nicklas@imaginary.host"
|
139
|
+
author.uri.should == "http://imaginary.host/students/jnicklas"
|
140
|
+
end
|
141
|
+
|
142
|
+
it "should serialize the title for an rss2 feed" do
|
143
|
+
@rss2.to_hash[:title].should == "Star City"
|
144
|
+
end
|
145
|
+
|
146
|
+
it "should serialize the item_id for an rss2 feed" do
|
147
|
+
@rss2.to_hash[:item_id].should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
|
148
|
+
end
|
149
|
+
|
150
|
+
it "should serialize updated_at for an rss2 feed" do
|
151
|
+
@rss2.to_hash[:updated_at].should == Time.utc(2003, 6, 3, 9, 39, 21)
|
152
|
+
end
|
153
|
+
|
154
|
+
it "should serialize the url for an rss2 feed" do
|
155
|
+
@rss2.to_hash[:url].should == "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
|
156
|
+
end
|
157
|
+
|
158
|
+
it "should serialize the author of an rss2 feed" do
|
159
|
+
author = @rss2.to_hash[:author]
|
160
|
+
|
161
|
+
author.name.should == "Chuck Norris"
|
162
|
+
author.email.should == "da_man@example.com"
|
163
|
+
author.uri.should be_nil
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
end
|
168
|
+
|
169
|
+
describe "Without an author", FeedMe::ItemParser do
|
170
|
+
|
171
|
+
before :each do
|
172
|
+
@atom_feed = FeedMe::FeedParser.open(fixture('welformed.atom'))
|
173
|
+
@atom = FeedMe::ItemParser.build(@atom_feed.root_node.search('/entry')[1], :atom, @atom_feed)
|
174
|
+
@rss2_feed = FeedMe::FeedParser.open(fixture('welformed.rss2'))
|
175
|
+
@rss2 = FeedMe::ItemParser.build(@rss2_feed.root_node.search('/item')[1], :rss2, @rss2_feed)
|
176
|
+
end
|
177
|
+
|
178
|
+
describe '#author.name' do
|
179
|
+
it "should be valid for an atom feed" do
|
180
|
+
@atom.author.name.should be_nil
|
181
|
+
end
|
182
|
+
|
183
|
+
it "should be valid for an rss2 feed" do
|
184
|
+
@rss2.author.name.should be_nil
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require File.join( File.dirname(__FILE__), "spec_helper" )
|
2
|
+
|
3
|
+
require 'feed_me'
|
4
|
+
|
5
|
+
describe FeedMe::SimpleStruct do
|
6
|
+
|
7
|
+
it "should append methods" do
|
8
|
+
struct = FeedMe::SimpleStruct.new(:foo => "blah", :bar => 23)
|
9
|
+
|
10
|
+
struct.foo.should == "blah"
|
11
|
+
struct.bar.should == 23
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
$TESTING=true
|
2
|
+
$:.push File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
# require 'ruby-debug'
|
6
|
+
|
7
|
+
module Fixtures
|
8
|
+
|
9
|
+
def fixture_path
|
10
|
+
File.join(File.dirname(__FILE__), 'fixtures')
|
11
|
+
end
|
12
|
+
|
13
|
+
def fixture(name)
|
14
|
+
File.join(fixture_path, name)
|
15
|
+
end
|
16
|
+
|
17
|
+
def hpricot_fixture(name)
|
18
|
+
Hpricot.XML(open(fixture(name)).read)
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
Spec::Runner.configure do |config|
|
24
|
+
config.include(Fixtures)
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: feed_me
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.5
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jonas Nicklas
|
8
|
+
- Jonathan Stott
|
9
|
+
autorequire: feed_me
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2009-11-06 00:00:00 +00:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: hpricot
|
18
|
+
type: :runtime
|
19
|
+
version_requirement:
|
20
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: "0"
|
25
|
+
version:
|
26
|
+
description: Nice and simple RSS and atom feed parsing built on hpricot
|
27
|
+
email: jonas.nicklas@gmail.com
|
28
|
+
executables: []
|
29
|
+
|
30
|
+
extensions: []
|
31
|
+
|
32
|
+
extra_rdoc_files:
|
33
|
+
- LICENSE
|
34
|
+
- README
|
35
|
+
- TODO
|
36
|
+
files:
|
37
|
+
- LICENSE
|
38
|
+
- README
|
39
|
+
- Rakefile
|
40
|
+
- TODO
|
41
|
+
- lib/feed_me.rb
|
42
|
+
- lib/feed_me/abstract_parser.rb
|
43
|
+
- lib/feed_me/consts.rb
|
44
|
+
- lib/feed_me/feed_parser.rb
|
45
|
+
- lib/feed_me/feed_struct.rb
|
46
|
+
- lib/feed_me/item_parser.rb
|
47
|
+
- lib/feed_me/merbtasks.rb
|
48
|
+
- lib/feed_me/simple_struct.rb
|
49
|
+
has_rdoc: true
|
50
|
+
homepage: http://github.com/jnicklas/feed_me
|
51
|
+
licenses: []
|
52
|
+
|
53
|
+
post_install_message:
|
54
|
+
rdoc_options:
|
55
|
+
- --charset=UTF-8
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: "0"
|
63
|
+
version:
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: "0"
|
69
|
+
version:
|
70
|
+
requirements: []
|
71
|
+
|
72
|
+
rubyforge_project:
|
73
|
+
rubygems_version: 1.3.5
|
74
|
+
signing_key:
|
75
|
+
specification_version: 3
|
76
|
+
summary: Nice and simple RSS and atom feed parsing built on hpricot
|
77
|
+
test_files:
|
78
|
+
- spec/feed_parser_spec.rb
|
79
|
+
- spec/item_parser_spec.rb
|
80
|
+
- spec/simple_struct_spec.rb
|
81
|
+
- spec/spec_helper.rb
|