feed_me 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README +23 -0
- data/Rakefile +59 -0
- data/TODO +2 -0
- data/lib/feed_me.rb +36 -0
- data/lib/feed_me/abstract_parser.rb +119 -0
- data/lib/feed_me/consts.rb +69 -0
- data/lib/feed_me/feed_parser.rb +59 -0
- data/lib/feed_me/feed_struct.rb +13 -0
- data/lib/feed_me/item_parser.rb +25 -0
- data/lib/feed_me/merbtasks.rb +6 -0
- data/lib/feed_me/simple_struct.rb +15 -0
- data/spec/feed_parser_spec.rb +295 -0
- data/spec/item_parser_spec.rb +188 -0
- data/spec/simple_struct_spec.rb +14 -0
- data/spec/spec_helper.rb +25 -0
- metadata +81 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2008 Jonas Nicklas
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
Feed Me
|
2
|
+
=======
|
3
|
+
|
4
|
+
Feed Me is a simple parser for RSS2 and Atom feed, adding other feed formats should be trivial. Feed Me is pretty minimal and basically only does translation/cleanup from different feed formats to a consistent API. It is designed to be minimal.
|
5
|
+
|
6
|
+
Feed Me is built on the excellent Hpricot parser written by _why the lucky stiff.
|
7
|
+
|
8
|
+
Use it like this:
|
9
|
+
|
10
|
+
file = File.read('some_feed.atom')
|
11
|
+
|
12
|
+
feed = FeedMe.parse(file)
|
13
|
+
|
14
|
+
feed.title
|
15
|
+
feed.author.name
|
16
|
+
feed.entries.each do |entry|
|
17
|
+
entry.title
|
18
|
+
entry.content
|
19
|
+
end
|
20
|
+
|
21
|
+
Check out the specs or lib/feed_me/consts.rb for the complete API.
|
22
|
+
|
23
|
+
DISCLAIMER: This is very much alpha software, use at your own risk!
|
data/Rakefile
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake/gempackagetask'
|
3
|
+
require 'spec/rake/spectask'
|
4
|
+
|
5
|
+
file_list = FileList['spec/*_spec.rb']
|
6
|
+
|
7
|
+
namespace :spec do
|
8
|
+
desc "Run all examples with RCov"
|
9
|
+
Spec::Rake::SpecTask.new('rcov') do |t|
|
10
|
+
t.spec_files = file_list
|
11
|
+
t.rcov = true
|
12
|
+
t.rcov_dir = "doc/coverage"
|
13
|
+
t.rcov_opts = ['--exclude', 'spec']
|
14
|
+
end
|
15
|
+
|
16
|
+
desc "Generate an html report"
|
17
|
+
Spec::Rake::SpecTask.new('report') do |t|
|
18
|
+
t.spec_files = file_list
|
19
|
+
t.rcov_opts = ['--exclude', 'spec']
|
20
|
+
t.spec_opts = ["--format", "html:doc/reports/specs.html"]
|
21
|
+
t.fail_on_error = false
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
desc 'Default: run specs.'
|
27
|
+
task :default => 'spec:rcov'
|
28
|
+
|
29
|
+
PLUGIN = "feed_me"
|
30
|
+
NAME = "feed_me"
|
31
|
+
AUTHOR = ["Jonas Nicklas", "Jonathan Stott"]
|
32
|
+
EMAIL = "jonas.nicklas@gmail.com"
|
33
|
+
HOMEPAGE = "http://github.com/jnicklas/feed_me"
|
34
|
+
SUMMARY = "Nice and simple RSS and atom feed parsing built on hpricot"
|
35
|
+
|
36
|
+
spec = Gem::Specification.new do |s|
|
37
|
+
end
|
38
|
+
|
39
|
+
begin
|
40
|
+
require 'jeweler'
|
41
|
+
Jeweler::Tasks.new do |s|
|
42
|
+
s.name = NAME
|
43
|
+
s.platform = Gem::Platform::RUBY
|
44
|
+
s.has_rdoc = true
|
45
|
+
s.extra_rdoc_files = ["README", "LICENSE", 'TODO']
|
46
|
+
s.summary = SUMMARY
|
47
|
+
s.description = s.summary
|
48
|
+
s.authors = AUTHOR
|
49
|
+
s.email = EMAIL
|
50
|
+
s.homepage = HOMEPAGE
|
51
|
+
s.require_path = 'lib'
|
52
|
+
s.autorequire = PLUGIN
|
53
|
+
s.add_dependency('hpricot')
|
54
|
+
s.files = %w(LICENSE README Rakefile TODO) + Dir.glob("{lib,specs}/**/*")
|
55
|
+
end
|
56
|
+
Jeweler::GemcutterTasks.new
|
57
|
+
rescue
|
58
|
+
puts "Jeweler, or one of its dependencies, is not available. Install it with: sudo gem install namelessjon-jeweler -s http://gems.github.com"
|
59
|
+
end
|
data/lib/feed_me.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# make sure we're running inside Merb
|
2
|
+
if defined?(Merb::Plugins)
|
3
|
+
dependency 'hpricot'
|
4
|
+
else
|
5
|
+
require 'rubygems'
|
6
|
+
require 'hpricot'
|
7
|
+
end
|
8
|
+
require 'time'
|
9
|
+
|
10
|
+
unless nil.respond_to? :try
|
11
|
+
# the ultimate duck
|
12
|
+
class Object
|
13
|
+
def try(method, *args)
|
14
|
+
self.send(method, *args)
|
15
|
+
rescue NoMethodError
|
16
|
+
nil
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
module FeedMe
|
22
|
+
class InvalidFeedFormat < StandardError ; end
|
23
|
+
|
24
|
+
def self.parse(feed)
|
25
|
+
FeedMe::FeedParser.parse(feed)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.open(file)
|
29
|
+
FeedMe::FeedParser.parse(file)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
['consts', 'abstract_parser', 'feed_struct', 'simple_struct',
|
34
|
+
'feed_parser', 'item_parser'].each do |f|
|
35
|
+
require File.join(File.dirname(__FILE__), 'feed_me', f)
|
36
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
class FeedMe::AbstractParser
|
2
|
+
|
3
|
+
class << self
|
4
|
+
|
5
|
+
attr_accessor :properties, :root_nodes
|
6
|
+
|
7
|
+
def build(xml, format, *args)
|
8
|
+
# in a world with activesupport this would have been written as
|
9
|
+
# format_parser = (format.to_s.camelize + self.to_s).constantize
|
10
|
+
camelized_format = format.to_s.split('_').map{ |w| w.capitalize }.join('')
|
11
|
+
bare_class = self.to_s.split('::').last
|
12
|
+
|
13
|
+
begin
|
14
|
+
format_parser = FeedMe.const_get(camelized_format + bare_class)
|
15
|
+
rescue NameError
|
16
|
+
end
|
17
|
+
|
18
|
+
if format_parser.is_a?(Class) and format_parser.ancestors.include?(self)
|
19
|
+
return format_parser.new(xml, format, *args)
|
20
|
+
else
|
21
|
+
return self.new(xml, format, *args)
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize(xml, format)
|
29
|
+
self.xml = xml
|
30
|
+
self.format = format
|
31
|
+
self.properties = self.class.properties[self.format]
|
32
|
+
|
33
|
+
append_methods
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_hash
|
37
|
+
hash = {}
|
38
|
+
self.properties.each do |method, p|
|
39
|
+
hash[method] = self.send(method)
|
40
|
+
end
|
41
|
+
return hash
|
42
|
+
end
|
43
|
+
|
44
|
+
attr_accessor :xml, :format, :properties
|
45
|
+
|
46
|
+
alias_method :root_node, :xml
|
47
|
+
|
48
|
+
protected
|
49
|
+
|
50
|
+
def fetch_rss_person(selector)
|
51
|
+
item = fetch(selector)
|
52
|
+
if(item)
|
53
|
+
email, name = item.split(/\s+/, 2)
|
54
|
+
name = name.match( /\((.*?)\)/ ).to_a[1] if name # strip parentheses
|
55
|
+
else
|
56
|
+
name, email = nil
|
57
|
+
end
|
58
|
+
FeedMe::SimpleStruct.new(:email => email, :name => name, :uri => nil)
|
59
|
+
end
|
60
|
+
|
61
|
+
def append_methods
|
62
|
+
self.properties.each do |method, p|
|
63
|
+
unless respond_to?(method)
|
64
|
+
block = get_proc_for_property(method, p)
|
65
|
+
# meta programming magic
|
66
|
+
(class << self; self; end).module_eval do
|
67
|
+
define_method method, &block
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def get_proc_for_property(method, p)
|
74
|
+
if p.class == Array
|
75
|
+
return caching_proc(method, &proc { fetch("/#{p[0]}", root_node, p[1].to_sym) })
|
76
|
+
elsif p.class == Hash
|
77
|
+
return caching_proc(method, &proc { FeedMe::FeedStruct.new(root_node, p) })
|
78
|
+
elsif p != :undefined
|
79
|
+
return caching_proc(method, &proc { fetch("/#{p}", root_node) })
|
80
|
+
else
|
81
|
+
return proc { nil }
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def caching_proc(name, &block)
|
86
|
+
proc do
|
87
|
+
ivar = instance_variable_get("@#{name}")
|
88
|
+
unless ivar
|
89
|
+
result = yield
|
90
|
+
instance_variable_set("@#{name}", result)
|
91
|
+
return result
|
92
|
+
end
|
93
|
+
ivar
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def fetch(selector, search_in = xml, method = :inner_html)
|
98
|
+
item = search_in.search(selector)
|
99
|
+
|
100
|
+
unless method == :array
|
101
|
+
self.try("extract_" + method.to_s, item.first) unless item.empty?
|
102
|
+
else
|
103
|
+
item.map { |i| self.try("extract_inner_html", i) }
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def extract_inner_html(item)
|
108
|
+
item.inner_html
|
109
|
+
end
|
110
|
+
|
111
|
+
def extract_href(item)
|
112
|
+
item[:href]
|
113
|
+
end
|
114
|
+
|
115
|
+
def extract_time(item)
|
116
|
+
Time.parse(item.inner_html).utc
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module FeedMe
|
2
|
+
|
3
|
+
ROOT_NODES = {
|
4
|
+
:atom => "//feed[@xmlns='http://www.w3.org/2005/Atom']",
|
5
|
+
:rss2 => "//rss[@version=2.0]/channel"
|
6
|
+
}
|
7
|
+
|
8
|
+
FEED_PROPERTIES = {
|
9
|
+
:atom => {
|
10
|
+
:title => :title,
|
11
|
+
:updated_at => [:updated, :time],
|
12
|
+
:feed_id => :id,
|
13
|
+
:url => ["link[@rel=alternate]", :href],
|
14
|
+
:href => ["link[@rel=self]", :href],
|
15
|
+
:description => :subtitle,
|
16
|
+
:generator => :generator,
|
17
|
+
:author => {
|
18
|
+
:email => 'author/email',
|
19
|
+
:name => 'author/name',
|
20
|
+
:uri => 'author/uri'
|
21
|
+
},
|
22
|
+
:entries => :special
|
23
|
+
},
|
24
|
+
:rss2 => {
|
25
|
+
:title => :title,
|
26
|
+
:updated_at => [:lastBuildDate, :time],
|
27
|
+
:feed_id => :undefined,
|
28
|
+
:url => :link,
|
29
|
+
:href => :undefined,
|
30
|
+
:description => :description,
|
31
|
+
:generator => :generator,
|
32
|
+
:author => :special,
|
33
|
+
:entries => :special
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
ITEM_PROPERTIES = {
|
38
|
+
:atom => {
|
39
|
+
:title => :title,
|
40
|
+
:updated_at => [:updated, :time],
|
41
|
+
:item_id => :id,
|
42
|
+
:url => ["link[@rel=alternate]", :href],
|
43
|
+
:content => :content,
|
44
|
+
:author => {
|
45
|
+
:email => 'author/email',
|
46
|
+
:name => 'author/name',
|
47
|
+
:uri => 'author/uri'
|
48
|
+
}
|
49
|
+
},
|
50
|
+
:rss2 => {
|
51
|
+
:title => :title,
|
52
|
+
:updated_at => [:pubDate, :time],
|
53
|
+
:item_id => :guid,
|
54
|
+
:url => :link,
|
55
|
+
:content => :description,
|
56
|
+
:author => :special,
|
57
|
+
:categories => [:category, :array]
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
AUTHOR_PROPERTIES = {
|
62
|
+
:atom => {
|
63
|
+
:name => :name,
|
64
|
+
:uri => :uri,
|
65
|
+
:email => :email
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module FeedMe
|
2
|
+
|
3
|
+
class FeedParser < AbstractParser
|
4
|
+
|
5
|
+
self.properties = FEED_PROPERTIES
|
6
|
+
|
7
|
+
class << self
|
8
|
+
|
9
|
+
def open(file)
|
10
|
+
self.parse(Kernel.open(file).read)
|
11
|
+
end
|
12
|
+
|
13
|
+
# parses the passed feed and identifeis what kind of feed it is
|
14
|
+
# then returns a parser object
|
15
|
+
def parse(feed)
|
16
|
+
xml = Hpricot.XML(feed)
|
17
|
+
|
18
|
+
root_node, format = self.identify(xml)
|
19
|
+
raise InvalidFeedFormat if format.nil?
|
20
|
+
|
21
|
+
self.build(root_node, format)
|
22
|
+
end
|
23
|
+
|
24
|
+
protected
|
25
|
+
|
26
|
+
def identify(xml)
|
27
|
+
FeedMe::ROOT_NODES.each do |f, s|
|
28
|
+
item = xml.at(s)
|
29
|
+
return item, f if item
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class AtomFeedParser < FeedParser
|
37
|
+
self.properties = FEED_PROPERTIES
|
38
|
+
|
39
|
+
def entries
|
40
|
+
xml.search('entry').map do |el|
|
41
|
+
ItemParser.build(el, self.format, self)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
class Rss2FeedParser < FeedParser
|
47
|
+
self.properties = FEED_PROPERTIES
|
48
|
+
|
49
|
+
def entries
|
50
|
+
xml.search('item').map do |el|
|
51
|
+
ItemParser.build(el, self.format, self)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def author
|
56
|
+
fetch_rss_person("managingEditor")
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module FeedMe
|
2
|
+
|
3
|
+
class ItemParser < AbstractParser
|
4
|
+
|
5
|
+
self.properties = ITEM_PROPERTIES
|
6
|
+
|
7
|
+
attr_accessor :feed
|
8
|
+
|
9
|
+
def initialize(xml, format, feed)
|
10
|
+
super(xml, format)
|
11
|
+
self.feed = feed
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
class Rss2ItemParser < ItemParser
|
17
|
+
|
18
|
+
self.properties = ITEM_PROPERTIES
|
19
|
+
|
20
|
+
def author
|
21
|
+
fetch_rss_person("author")
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,295 @@
|
|
1
|
+
require File.join( File.dirname(__FILE__), "spec_helper" )
|
2
|
+
|
3
|
+
require 'feed_me'
|
4
|
+
|
5
|
+
describe "all parsing methods", :shared => true do
|
6
|
+
it "should identify an atom feed" do
|
7
|
+
@atom.should be_an_instance_of(FeedMe::AtomFeedParser)
|
8
|
+
@atom.format.should == :atom
|
9
|
+
@atom.root_node.xpath == "//feed[@xmlns='http://www.w3.org/2005/Atom']"
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should identify an rss2 feed" do
|
13
|
+
@rss2.should be_an_instance_of(FeedMe::Rss2FeedParser)
|
14
|
+
@rss2.format.should == :rss2
|
15
|
+
@rss2.root_node.xpath == "//rss[@version=2.0]/channel"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe FeedMe::FeedParser do
|
20
|
+
|
21
|
+
before :each do
|
22
|
+
@atom_feed = hpricot_fixture('welformed.atom') / "//feed[@xmlns='http://www.w3.org/2005/Atom']"
|
23
|
+
@atom = FeedMe::FeedParser.build(@atom_feed, :atom)
|
24
|
+
@rss2_feed = hpricot_fixture('welformed.rss2') / "//rss[@version=2.0]/channel"
|
25
|
+
@rss2 = FeedMe::FeedParser.build(@rss2_feed, :rss2)
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should be an atom parser for an atom feed" do
|
29
|
+
@atom.should be_an_instance_of(FeedMe::AtomFeedParser)
|
30
|
+
end
|
31
|
+
|
32
|
+
describe ".parse" do
|
33
|
+
before(:each) do
|
34
|
+
@atom = FeedMe::FeedParser.parse(open(fixture('welformed.atom')).read)
|
35
|
+
@rss2 = FeedMe::FeedParser.parse(open(fixture('welformed.rss2')).read)
|
36
|
+
end
|
37
|
+
|
38
|
+
it_should_behave_like "all parsing methods"
|
39
|
+
|
40
|
+
describe "with bad input" do
|
41
|
+
it "should raise on an empty body" do
|
42
|
+
lambda { FeedMe::FeedParser.parse("") }.should raise_error(FeedMe::InvalidFeedFormat)
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should raise on a body with non-recognised xml" do
|
46
|
+
lambda {
|
47
|
+
FeedMe::FeedParser.parse(%Q|<?xml version="1.0" encoding="UTF-8"?>"<foo>bar</foo>|)
|
48
|
+
}.should raise_error(FeedMe::InvalidFeedFormat)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
describe ".open" do
|
55
|
+
before(:each) do
|
56
|
+
@atom = FeedMe::FeedParser.open(fixture('welformed.atom'))
|
57
|
+
@rss2 = FeedMe::FeedParser.open(fixture('welformed.rss2'))
|
58
|
+
end
|
59
|
+
|
60
|
+
it_should_behave_like "all parsing methods"
|
61
|
+
end
|
62
|
+
|
63
|
+
describe '#title' do
|
64
|
+
it "should be valid for an atom feed" do
|
65
|
+
@atom.title.should == "Test feed"
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should be valid for an rss2 feed" do
|
69
|
+
@rss2.title.should == "Lift Off News"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe '#description' do
|
74
|
+
it "should be valid for an atom feed" do
|
75
|
+
@atom.description.should == "Monkey test feed"
|
76
|
+
end
|
77
|
+
|
78
|
+
it "should be valid for an rss2 feed" do
|
79
|
+
@rss2.description.should == "Liftoff to Space Exploration."
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe '#feed_id' do
|
84
|
+
it "should be valid for an atom feed" do
|
85
|
+
@atom.feed_id.should == "tag:imaginary.host:nyheter"
|
86
|
+
end
|
87
|
+
|
88
|
+
it "should be nil for an rss2 feed" do
|
89
|
+
@rss2.feed_id.should be_nil
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
describe '#updated_at' do
|
94
|
+
it "should be valid for an atom feed" do
|
95
|
+
@atom.updated_at.should == Time.utc(2008, 3, 7, 20, 41, 10)
|
96
|
+
end
|
97
|
+
|
98
|
+
it "should be valid for an rss2 feed" do
|
99
|
+
@rss2.updated_at.should == Time.utc(2003, 6, 10, 9, 41, 1)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
describe '#href' do
|
104
|
+
it "should be valid for an atom feed" do
|
105
|
+
@atom.href.should == "http://imaginary.host/posts.atom"
|
106
|
+
end
|
107
|
+
|
108
|
+
it "should be nil for an atom feed" do
|
109
|
+
@rss2.href.should be_nil
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
describe '#url' do
|
114
|
+
it "should be valid for an atom feed" do
|
115
|
+
@atom.url.should == "http://imaginary.host/posts"
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should be valid for an rss2 feed" do
|
119
|
+
@rss2.url.should == "http://liftoff.msfc.nasa.gov/"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
describe '#generator' do
|
124
|
+
it "should be valid for an atom feed" do
|
125
|
+
@atom.generator.should == "Roll your own"
|
126
|
+
end
|
127
|
+
|
128
|
+
it "should be valid for an rss2 feed" do
|
129
|
+
@rss2.generator.should == "Weblog Editor 2.0"
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
describe '#format' do
|
134
|
+
it "should be :atom for an atom feed" do
|
135
|
+
@atom.format.should == :atom
|
136
|
+
end
|
137
|
+
|
138
|
+
it "should be :rss2 for an rss2 feed" do
|
139
|
+
@rss2.format.should == :rss2
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
describe '#author.name' do
|
144
|
+
it "should be valid for an atom feed" do
|
145
|
+
@atom.author.name.should == "Frank"
|
146
|
+
end
|
147
|
+
|
148
|
+
it "should be valid for an rss2 feed" do
|
149
|
+
@rss2.author.name.should == "Mary Jo"
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
describe '#author.email' do
|
154
|
+
it "should be valid for an atom feed" do
|
155
|
+
@atom.author.email.should == "frank@imaginary.host"
|
156
|
+
end
|
157
|
+
|
158
|
+
it "should be valid for an rss2 feed" do
|
159
|
+
@rss2.author.email.should == "editor@example.com"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
describe '#author.uri' do
|
164
|
+
it "should be valid for an atom feed" do
|
165
|
+
@atom.author.uri.should == "http://imaginary.host/students/frank"
|
166
|
+
end
|
167
|
+
|
168
|
+
it "should be nil for an rss2 feed" do
|
169
|
+
@rss2.author.uri.should be_nil
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
describe '#entries' do
|
174
|
+
it "should return an array of entries for an atom feed" do
|
175
|
+
@atom.entries.should be_an_instance_of(Array)
|
176
|
+
end
|
177
|
+
|
178
|
+
it "should have the correct length for an atom feed" do
|
179
|
+
@atom.should have(3).entries
|
180
|
+
end
|
181
|
+
|
182
|
+
it "should return items that are properly parsed for an atom feed" do
|
183
|
+
@atom.entries.first.title.should == "First title"
|
184
|
+
@atom.entries.first.url.should == "http://imaginary.host/posts/3"
|
185
|
+
end
|
186
|
+
|
187
|
+
it "should return an array of entries for an rss2 feed" do
|
188
|
+
@rss2.entries.should be_an_instance_of(Array)
|
189
|
+
end
|
190
|
+
|
191
|
+
it "should have the correct length for an rss2 feed" do
|
192
|
+
@rss2.should have(4).entries
|
193
|
+
end
|
194
|
+
|
195
|
+
it "should return items that are properly parsed for an rss2 feed" do
|
196
|
+
@rss2.entries.first.title.should == "Star City"
|
197
|
+
@rss2.entries.first.url.should == "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
|
198
|
+
@rss2.entries.first.item_id.should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
|
199
|
+
end
|
200
|
+
|
201
|
+
it "should allow items to be read more than once" do
|
202
|
+
item = @rss2.entries.first
|
203
|
+
item.item_id.should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
|
204
|
+
item.item_id.should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
describe '#to_hash' do
|
209
|
+
it "should serialize the title of an atom feed" do
|
210
|
+
@atom.to_hash[:title].should == "Test feed"
|
211
|
+
end
|
212
|
+
|
213
|
+
it "should serialize the description of an atom feed" do
|
214
|
+
@atom.to_hash[:description].should == "Monkey test feed"
|
215
|
+
end
|
216
|
+
|
217
|
+
it "should serialize the feed_id of an atom feed" do
|
218
|
+
@atom.to_hash[:feed_id].should == "tag:imaginary.host:nyheter"
|
219
|
+
end
|
220
|
+
|
221
|
+
it "should serialize the updated_at time of an atom feed" do
|
222
|
+
@atom.to_hash[:updated_at].should == Time.utc(2008, 3, 7, 20, 41, 10)
|
223
|
+
end
|
224
|
+
|
225
|
+
it "should serialize the href of an atom feed" do
|
226
|
+
@atom.to_hash[:href].should == "http://imaginary.host/posts.atom"
|
227
|
+
end
|
228
|
+
|
229
|
+
it "should serialize the url of an atom feed" do
|
230
|
+
@atom.to_hash[:url].should == "http://imaginary.host/posts"
|
231
|
+
end
|
232
|
+
|
233
|
+
it "should serialize the generator of an atom feed" do
|
234
|
+
@atom.to_hash[:generator].should == "Roll your own"
|
235
|
+
end
|
236
|
+
|
237
|
+
it "should serialize the entries of an atom feed" do
|
238
|
+
@atom.to_hash[:entries].should be_an_instance_of(Array)
|
239
|
+
@atom.to_hash[:entries].first.title.should == "First title"
|
240
|
+
@atom.to_hash[:entries].first.url.should == "http://imaginary.host/posts/3"
|
241
|
+
end
|
242
|
+
|
243
|
+
it "should serialize the author of an atom feed" do
|
244
|
+
author = @atom.to_hash[:author]
|
245
|
+
|
246
|
+
author.name.should == "Frank"
|
247
|
+
author.email.should == "frank@imaginary.host"
|
248
|
+
author.uri.should == "http://imaginary.host/students/frank"
|
249
|
+
end
|
250
|
+
|
251
|
+
it "should serialize the title of an rss2 feed" do
|
252
|
+
@rss2.to_hash[:title].should == "Lift Off News"
|
253
|
+
end
|
254
|
+
|
255
|
+
it "should serialize the description of an rss2 feed" do
|
256
|
+
@rss2.to_hash[:description].should == "Liftoff to Space Exploration."
|
257
|
+
end
|
258
|
+
|
259
|
+
it "should serialize the feed_id of an rss2 feed" do
|
260
|
+
@rss2.to_hash[:feed_id].should be_nil
|
261
|
+
end
|
262
|
+
|
263
|
+
it "should serialize the updated_at time of an rss2 feed" do
|
264
|
+
@rss2.to_hash[:updated_at].should == Time.utc(2003, 6, 10, 9, 41, 1)
|
265
|
+
end
|
266
|
+
|
267
|
+
it "should serialize the href of an rss2 feed" do
|
268
|
+
@rss2.to_hash[:href].should be_nil
|
269
|
+
end
|
270
|
+
|
271
|
+
it "should serialize the url of an rss2 feed" do
|
272
|
+
@rss2.to_hash[:url].should == "http://liftoff.msfc.nasa.gov/"
|
273
|
+
end
|
274
|
+
|
275
|
+
it "should serialize the generator of an rss2 feed" do
|
276
|
+
@rss2.to_hash[:generator].should == "Weblog Editor 2.0"
|
277
|
+
end
|
278
|
+
|
279
|
+
it "should serialize the entries of an rss2 feed" do
|
280
|
+
@rss2.to_hash[:entries].should be_an_instance_of(Array)
|
281
|
+
@rss2.to_hash[:entries].first.title.should == "Star City"
|
282
|
+
@rss2.to_hash[:entries].first.url.should == "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
|
283
|
+
end
|
284
|
+
|
285
|
+
it "should serialize the author of an rss2 feed" do
|
286
|
+
|
287
|
+
author = @rss2.to_hash[:author]
|
288
|
+
|
289
|
+
author.name.should == "Mary Jo"
|
290
|
+
author.email.should == "editor@example.com"
|
291
|
+
author.uri.should be_nil
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
end
|
@@ -0,0 +1,188 @@
|
|
1
|
+
require File.join( File.dirname(__FILE__), "spec_helper" )
|
2
|
+
|
3
|
+
require 'feed_me'
|
4
|
+
|
5
|
+
describe FeedMe::ItemParser do
|
6
|
+
|
7
|
+
before :each do
|
8
|
+
@atom_feed = FeedMe::FeedParser.open(fixture('welformed.atom'))
|
9
|
+
@atom = FeedMe::ItemParser.build(@atom_feed.root_node.search('/entry').first, :atom, @atom_feed)
|
10
|
+
@rss2_feed = FeedMe::FeedParser.open(fixture('welformed.rss2'))
|
11
|
+
@rss2 = FeedMe::ItemParser.build(@rss2_feed.root_node.search('/item').first, :rss2, @rss2_feed)
|
12
|
+
end
|
13
|
+
|
14
|
+
describe '#to_hash' do
|
15
|
+
it "should serialize the parsed properties to a hash" do
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '#title' do
|
21
|
+
it "should be valid for an atom feed" do
|
22
|
+
@atom.title.should == "First title"
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should be valid for an rss2 feed" do
|
26
|
+
@rss2.title.should == "Star City"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe '#content' do
|
31
|
+
it "should be valid for an atom feed" do
|
32
|
+
@atom.content.should == "Here be content"
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should be valid for an rss2 feed" do
|
36
|
+
@rss2.content.should == "This is content"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe '#item_id' do
|
41
|
+
it "should be valid for an atom feed" do
|
42
|
+
@atom.item_id.should == "tag:imaginary.host,2008-03-07:nyheter/3"
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should be valid for an rss2 feed" do
|
46
|
+
@rss2.item_id.should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe '#updated_at' do
|
51
|
+
it "should be valid for an atom feed" do
|
52
|
+
@atom.updated_at.should == Time.utc( 2008, 3, 7, 20, 41, 10 )
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should be valid for an rss2 feed" do
|
56
|
+
@rss2.updated_at.should == Time.utc(2003, 6, 3, 9, 39, 21)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe '#url' do
|
61
|
+
it "should be valid for an atom feed" do
|
62
|
+
@atom.url.should == "http://imaginary.host/posts/3"
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should be valid for an rss2 feed" do
|
66
|
+
@rss2.url.should == "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
describe '#format' do
|
71
|
+
it "should be :atom for an atom feed" do
|
72
|
+
@atom.format.should == :atom
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should be :rss2 for an rss2 feed" do
|
76
|
+
@rss2.format.should == :rss2
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
describe '#categories' do
|
81
|
+
it "should be correct for an rss2 feed" do
|
82
|
+
@rss2.categories.should == ['news', 'chuck']
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
describe '#author.name' do
|
87
|
+
it "should be valid for an atom feed" do
|
88
|
+
@atom.author.name.should == "Jonas Nicklas"
|
89
|
+
end
|
90
|
+
|
91
|
+
it "should be valid for an rss2 feed" do
|
92
|
+
@rss2.author.name.should == "Chuck Norris"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
describe '#author.email' do
|
97
|
+
it "should be valid for an atom feed" do
|
98
|
+
@atom.author.email.should == "jonas.nicklas@imaginary.host"
|
99
|
+
end
|
100
|
+
|
101
|
+
it "should be valid for an rss2 feed" do
|
102
|
+
@rss2.author.email.should == "da_man@example.com"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
describe '#author.uri' do
|
107
|
+
it "should be valid for an atom feed" do
|
108
|
+
@atom.author.uri.should == "http://imaginary.host/students/jnicklas"
|
109
|
+
end
|
110
|
+
|
111
|
+
it "should be nil for an rss2 feed" do
|
112
|
+
@rss2.author.uri.should be_nil
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
describe '#to_hash' do
|
117
|
+
|
118
|
+
it "should serialize the title for an atom feed" do
|
119
|
+
@atom.to_hash[:title].should == "First title"
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should serialize the item_id for an atom feed" do
|
123
|
+
@atom.to_hash[:item_id].should == "tag:imaginary.host,2008-03-07:nyheter/3"
|
124
|
+
end
|
125
|
+
|
126
|
+
it "should serialize updated_at for an atom feed" do
|
127
|
+
@atom.to_hash[:updated_at].should == Time.utc( 2008, 3, 7, 20, 41, 10 )
|
128
|
+
end
|
129
|
+
|
130
|
+
it "should serialize the url for an atom feed" do
|
131
|
+
@atom.to_hash[:url].should == "http://imaginary.host/posts/3"
|
132
|
+
end
|
133
|
+
|
134
|
+
it "should serialize the author of an atom feed" do
|
135
|
+
author = @atom.to_hash[:author]
|
136
|
+
|
137
|
+
author.name.should == "Jonas Nicklas"
|
138
|
+
author.email.should == "jonas.nicklas@imaginary.host"
|
139
|
+
author.uri.should == "http://imaginary.host/students/jnicklas"
|
140
|
+
end
|
141
|
+
|
142
|
+
it "should serialize the title for an rss2 feed" do
|
143
|
+
@rss2.to_hash[:title].should == "Star City"
|
144
|
+
end
|
145
|
+
|
146
|
+
it "should serialize the item_id for an rss2 feed" do
|
147
|
+
@rss2.to_hash[:item_id].should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
|
148
|
+
end
|
149
|
+
|
150
|
+
it "should serialize updated_at for an rss2 feed" do
|
151
|
+
@rss2.to_hash[:updated_at].should == Time.utc(2003, 6, 3, 9, 39, 21)
|
152
|
+
end
|
153
|
+
|
154
|
+
it "should serialize the url for an rss2 feed" do
|
155
|
+
@rss2.to_hash[:url].should == "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
|
156
|
+
end
|
157
|
+
|
158
|
+
it "should serialize the author of an rss2 feed" do
|
159
|
+
author = @rss2.to_hash[:author]
|
160
|
+
|
161
|
+
author.name.should == "Chuck Norris"
|
162
|
+
author.email.should == "da_man@example.com"
|
163
|
+
author.uri.should be_nil
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
end
|
168
|
+
|
169
|
+
describe "Without an author", FeedMe::ItemParser do
|
170
|
+
|
171
|
+
before :each do
|
172
|
+
@atom_feed = FeedMe::FeedParser.open(fixture('welformed.atom'))
|
173
|
+
@atom = FeedMe::ItemParser.build(@atom_feed.root_node.search('/entry')[1], :atom, @atom_feed)
|
174
|
+
@rss2_feed = FeedMe::FeedParser.open(fixture('welformed.rss2'))
|
175
|
+
@rss2 = FeedMe::ItemParser.build(@rss2_feed.root_node.search('/item')[1], :rss2, @rss2_feed)
|
176
|
+
end
|
177
|
+
|
178
|
+
describe '#author.name' do
|
179
|
+
it "should be valid for an atom feed" do
|
180
|
+
@atom.author.name.should be_nil
|
181
|
+
end
|
182
|
+
|
183
|
+
it "should be valid for an rss2 feed" do
|
184
|
+
@rss2.author.name.should be_nil
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require File.join( File.dirname(__FILE__), "spec_helper" )
|
2
|
+
|
3
|
+
require 'feed_me'
|
4
|
+
|
5
|
+
describe FeedMe::SimpleStruct do
|
6
|
+
|
7
|
+
it "should append methods" do
|
8
|
+
struct = FeedMe::SimpleStruct.new(:foo => "blah", :bar => 23)
|
9
|
+
|
10
|
+
struct.foo.should == "blah"
|
11
|
+
struct.bar.should == 23
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
$TESTING=true
|
2
|
+
$:.push File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
# require 'ruby-debug'
|
6
|
+
|
7
|
+
module Fixtures
|
8
|
+
|
9
|
+
def fixture_path
|
10
|
+
File.join(File.dirname(__FILE__), 'fixtures')
|
11
|
+
end
|
12
|
+
|
13
|
+
def fixture(name)
|
14
|
+
File.join(fixture_path, name)
|
15
|
+
end
|
16
|
+
|
17
|
+
def hpricot_fixture(name)
|
18
|
+
Hpricot.XML(open(fixture(name)).read)
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
Spec::Runner.configure do |config|
|
24
|
+
config.include(Fixtures)
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: feed_me
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.5
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jonas Nicklas
|
8
|
+
- Jonathan Stott
|
9
|
+
autorequire: feed_me
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2009-11-06 00:00:00 +00:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: hpricot
|
18
|
+
type: :runtime
|
19
|
+
version_requirement:
|
20
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: "0"
|
25
|
+
version:
|
26
|
+
description: Nice and simple RSS and atom feed parsing built on hpricot
|
27
|
+
email: jonas.nicklas@gmail.com
|
28
|
+
executables: []
|
29
|
+
|
30
|
+
extensions: []
|
31
|
+
|
32
|
+
extra_rdoc_files:
|
33
|
+
- LICENSE
|
34
|
+
- README
|
35
|
+
- TODO
|
36
|
+
files:
|
37
|
+
- LICENSE
|
38
|
+
- README
|
39
|
+
- Rakefile
|
40
|
+
- TODO
|
41
|
+
- lib/feed_me.rb
|
42
|
+
- lib/feed_me/abstract_parser.rb
|
43
|
+
- lib/feed_me/consts.rb
|
44
|
+
- lib/feed_me/feed_parser.rb
|
45
|
+
- lib/feed_me/feed_struct.rb
|
46
|
+
- lib/feed_me/item_parser.rb
|
47
|
+
- lib/feed_me/merbtasks.rb
|
48
|
+
- lib/feed_me/simple_struct.rb
|
49
|
+
has_rdoc: true
|
50
|
+
homepage: http://github.com/jnicklas/feed_me
|
51
|
+
licenses: []
|
52
|
+
|
53
|
+
post_install_message:
|
54
|
+
rdoc_options:
|
55
|
+
- --charset=UTF-8
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: "0"
|
63
|
+
version:
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: "0"
|
69
|
+
version:
|
70
|
+
requirements: []
|
71
|
+
|
72
|
+
rubyforge_project:
|
73
|
+
rubygems_version: 1.3.5
|
74
|
+
signing_key:
|
75
|
+
specification_version: 3
|
76
|
+
summary: Nice and simple RSS and atom feed parsing built on hpricot
|
77
|
+
test_files:
|
78
|
+
- spec/feed_parser_spec.rb
|
79
|
+
- spec/item_parser_spec.rb
|
80
|
+
- spec/simple_struct_spec.rb
|
81
|
+
- spec/spec_helper.rb
|