feed_me 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Jonas Nicklas
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,23 @@
1
+ Feed Me
2
+ =======
3
+
4
+ Feed Me is a simple parser for RSS2 and Atom feed, adding other feed formats should be trivial. Feed Me is pretty minimal and basically only does translation/cleanup from different feed formats to a consistent API. It is designed to be minimal.
5
+
6
+ Feed Me is built on the excellent Hpricot parser written by _why the lucky stiff.
7
+
8
+ Use it like this:
9
+
10
+ file = File.read('some_feed.atom')
11
+
12
+ feed = FeedMe.parse(file)
13
+
14
+ feed.title
15
+ feed.author.name
16
+ feed.entries.each do |entry|
17
+ entry.title
18
+ entry.content
19
+ end
20
+
21
+ Check out the specs or lib/feed_me/consts.rb for the complete API.
22
+
23
+ DISCLAIMER: This is very much alpha software, use at your own risk!
data/Rakefile ADDED
@@ -0,0 +1,59 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'spec/rake/spectask'
4
+
5
+ file_list = FileList['spec/*_spec.rb']
6
+
7
+ namespace :spec do
8
+ desc "Run all examples with RCov"
9
+ Spec::Rake::SpecTask.new('rcov') do |t|
10
+ t.spec_files = file_list
11
+ t.rcov = true
12
+ t.rcov_dir = "doc/coverage"
13
+ t.rcov_opts = ['--exclude', 'spec']
14
+ end
15
+
16
+ desc "Generate an html report"
17
+ Spec::Rake::SpecTask.new('report') do |t|
18
+ t.spec_files = file_list
19
+ t.rcov_opts = ['--exclude', 'spec']
20
+ t.spec_opts = ["--format", "html:doc/reports/specs.html"]
21
+ t.fail_on_error = false
22
+ end
23
+
24
+ end
25
+
26
+ desc 'Default: run specs.'
27
+ task :default => 'spec:rcov'
28
+
29
+ PLUGIN = "feed_me"
30
+ NAME = "feed_me"
31
+ AUTHOR = ["Jonas Nicklas", "Jonathan Stott"]
32
+ EMAIL = "jonas.nicklas@gmail.com"
33
+ HOMEPAGE = "http://github.com/jnicklas/feed_me"
34
+ SUMMARY = "Nice and simple RSS and atom feed parsing built on hpricot"
35
+
36
+ spec = Gem::Specification.new do |s|
37
+ end
38
+
39
+ begin
40
+ require 'jeweler'
41
+ Jeweler::Tasks.new do |s|
42
+ s.name = NAME
43
+ s.platform = Gem::Platform::RUBY
44
+ s.has_rdoc = true
45
+ s.extra_rdoc_files = ["README", "LICENSE", 'TODO']
46
+ s.summary = SUMMARY
47
+ s.description = s.summary
48
+ s.authors = AUTHOR
49
+ s.email = EMAIL
50
+ s.homepage = HOMEPAGE
51
+ s.require_path = 'lib'
52
+ s.autorequire = PLUGIN
53
+ s.add_dependency('hpricot')
54
+ s.files = %w(LICENSE README Rakefile TODO) + Dir.glob("{lib,specs}/**/*")
55
+ end
56
+ Jeweler::GemcutterTasks.new
57
+ rescue
58
+ puts "Jeweler, or one of its dependencies, is not available. Install it with: sudo gem install namelessjon-jeweler -s http://gems.github.com"
59
+ end
data/TODO ADDED
@@ -0,0 +1,2 @@
1
+ TODO:
2
+ - Add categories to atom feeds
data/lib/feed_me.rb ADDED
@@ -0,0 +1,36 @@
1
+ # make sure we're running inside Merb
2
+ if defined?(Merb::Plugins)
3
+ dependency 'hpricot'
4
+ else
5
+ require 'rubygems'
6
+ require 'hpricot'
7
+ end
8
+ require 'time'
9
+
10
+ unless nil.respond_to? :try
11
+ # the ultimate duck
12
+ class Object
13
+ def try(method, *args)
14
+ self.send(method, *args)
15
+ rescue NoMethodError
16
+ nil
17
+ end
18
+ end
19
+ end
20
+
21
+ module FeedMe
22
+ class InvalidFeedFormat < StandardError ; end
23
+
24
+ def self.parse(feed)
25
+ FeedMe::FeedParser.parse(feed)
26
+ end
27
+
28
+ def self.open(file)
29
+ FeedMe::FeedParser.parse(file)
30
+ end
31
+ end
32
+
33
+ ['consts', 'abstract_parser', 'feed_struct', 'simple_struct',
34
+ 'feed_parser', 'item_parser'].each do |f|
35
+ require File.join(File.dirname(__FILE__), 'feed_me', f)
36
+ end
@@ -0,0 +1,119 @@
1
+ class FeedMe::AbstractParser
2
+
3
+ class << self
4
+
5
+ attr_accessor :properties, :root_nodes
6
+
7
+ def build(xml, format, *args)
8
+ # in a world with activesupport this would have been written as
9
+ # format_parser = (format.to_s.camelize + self.to_s).constantize
10
+ camelized_format = format.to_s.split('_').map{ |w| w.capitalize }.join('')
11
+ bare_class = self.to_s.split('::').last
12
+
13
+ begin
14
+ format_parser = FeedMe.const_get(camelized_format + bare_class)
15
+ rescue NameError
16
+ end
17
+
18
+ if format_parser.is_a?(Class) and format_parser.ancestors.include?(self)
19
+ return format_parser.new(xml, format, *args)
20
+ else
21
+ return self.new(xml, format, *args)
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+
28
+ def initialize(xml, format)
29
+ self.xml = xml
30
+ self.format = format
31
+ self.properties = self.class.properties[self.format]
32
+
33
+ append_methods
34
+ end
35
+
36
+ def to_hash
37
+ hash = {}
38
+ self.properties.each do |method, p|
39
+ hash[method] = self.send(method)
40
+ end
41
+ return hash
42
+ end
43
+
44
+ attr_accessor :xml, :format, :properties
45
+
46
+ alias_method :root_node, :xml
47
+
48
+ protected
49
+
50
+ def fetch_rss_person(selector)
51
+ item = fetch(selector)
52
+ if(item)
53
+ email, name = item.split(/\s+/, 2)
54
+ name = name.match( /\((.*?)\)/ ).to_a[1] if name # strip parentheses
55
+ else
56
+ name, email = nil
57
+ end
58
+ FeedMe::SimpleStruct.new(:email => email, :name => name, :uri => nil)
59
+ end
60
+
61
+ def append_methods
62
+ self.properties.each do |method, p|
63
+ unless respond_to?(method)
64
+ block = get_proc_for_property(method, p)
65
+ # meta programming magic
66
+ (class << self; self; end).module_eval do
67
+ define_method method, &block
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ def get_proc_for_property(method, p)
74
+ if p.class == Array
75
+ return caching_proc(method, &proc { fetch("/#{p[0]}", root_node, p[1].to_sym) })
76
+ elsif p.class == Hash
77
+ return caching_proc(method, &proc { FeedMe::FeedStruct.new(root_node, p) })
78
+ elsif p != :undefined
79
+ return caching_proc(method, &proc { fetch("/#{p}", root_node) })
80
+ else
81
+ return proc { nil }
82
+ end
83
+ end
84
+
85
+ def caching_proc(name, &block)
86
+ proc do
87
+ ivar = instance_variable_get("@#{name}")
88
+ unless ivar
89
+ result = yield
90
+ instance_variable_set("@#{name}", result)
91
+ return result
92
+ end
93
+ ivar
94
+ end
95
+ end
96
+
97
+ def fetch(selector, search_in = xml, method = :inner_html)
98
+ item = search_in.search(selector)
99
+
100
+ unless method == :array
101
+ self.try("extract_" + method.to_s, item.first) unless item.empty?
102
+ else
103
+ item.map { |i| self.try("extract_inner_html", i) }
104
+ end
105
+ end
106
+
107
+ def extract_inner_html(item)
108
+ item.inner_html
109
+ end
110
+
111
+ def extract_href(item)
112
+ item[:href]
113
+ end
114
+
115
+ def extract_time(item)
116
+ Time.parse(item.inner_html).utc
117
+ end
118
+
119
+ end
@@ -0,0 +1,69 @@
1
+ module FeedMe
2
+
3
+ ROOT_NODES = {
4
+ :atom => "//feed[@xmlns='http://www.w3.org/2005/Atom']",
5
+ :rss2 => "//rss[@version=2.0]/channel"
6
+ }
7
+
8
+ FEED_PROPERTIES = {
9
+ :atom => {
10
+ :title => :title,
11
+ :updated_at => [:updated, :time],
12
+ :feed_id => :id,
13
+ :url => ["link[@rel=alternate]", :href],
14
+ :href => ["link[@rel=self]", :href],
15
+ :description => :subtitle,
16
+ :generator => :generator,
17
+ :author => {
18
+ :email => 'author/email',
19
+ :name => 'author/name',
20
+ :uri => 'author/uri'
21
+ },
22
+ :entries => :special
23
+ },
24
+ :rss2 => {
25
+ :title => :title,
26
+ :updated_at => [:lastBuildDate, :time],
27
+ :feed_id => :undefined,
28
+ :url => :link,
29
+ :href => :undefined,
30
+ :description => :description,
31
+ :generator => :generator,
32
+ :author => :special,
33
+ :entries => :special
34
+ }
35
+ }
36
+
37
+ ITEM_PROPERTIES = {
38
+ :atom => {
39
+ :title => :title,
40
+ :updated_at => [:updated, :time],
41
+ :item_id => :id,
42
+ :url => ["link[@rel=alternate]", :href],
43
+ :content => :content,
44
+ :author => {
45
+ :email => 'author/email',
46
+ :name => 'author/name',
47
+ :uri => 'author/uri'
48
+ }
49
+ },
50
+ :rss2 => {
51
+ :title => :title,
52
+ :updated_at => [:pubDate, :time],
53
+ :item_id => :guid,
54
+ :url => :link,
55
+ :content => :description,
56
+ :author => :special,
57
+ :categories => [:category, :array]
58
+ }
59
+ }
60
+
61
+ AUTHOR_PROPERTIES = {
62
+ :atom => {
63
+ :name => :name,
64
+ :uri => :uri,
65
+ :email => :email
66
+ }
67
+ }
68
+
69
+ end
@@ -0,0 +1,59 @@
1
+ module FeedMe
2
+
3
+ class FeedParser < AbstractParser
4
+
5
+ self.properties = FEED_PROPERTIES
6
+
7
+ class << self
8
+
9
+ def open(file)
10
+ self.parse(Kernel.open(file).read)
11
+ end
12
+
13
+ # parses the passed feed and identifeis what kind of feed it is
14
+ # then returns a parser object
15
+ def parse(feed)
16
+ xml = Hpricot.XML(feed)
17
+
18
+ root_node, format = self.identify(xml)
19
+ raise InvalidFeedFormat if format.nil?
20
+
21
+ self.build(root_node, format)
22
+ end
23
+
24
+ protected
25
+
26
+ def identify(xml)
27
+ FeedMe::ROOT_NODES.each do |f, s|
28
+ item = xml.at(s)
29
+ return item, f if item
30
+ end
31
+ end
32
+
33
+ end
34
+ end
35
+
36
+ class AtomFeedParser < FeedParser
37
+ self.properties = FEED_PROPERTIES
38
+
39
+ def entries
40
+ xml.search('entry').map do |el|
41
+ ItemParser.build(el, self.format, self)
42
+ end
43
+ end
44
+ end
45
+
46
+ class Rss2FeedParser < FeedParser
47
+ self.properties = FEED_PROPERTIES
48
+
49
+ def entries
50
+ xml.search('item').map do |el|
51
+ ItemParser.build(el, self.format, self)
52
+ end
53
+ end
54
+
55
+ def author
56
+ fetch_rss_person("managingEditor")
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,13 @@
1
+ module FeedMe
2
+
3
+ class FeedStruct < AbstractParser
4
+
5
+ def initialize(xml, properties)
6
+ self.xml = xml
7
+ self.properties = properties
8
+ append_methods
9
+ end
10
+
11
+ end
12
+
13
+ end
@@ -0,0 +1,25 @@
1
+ module FeedMe
2
+
3
+ class ItemParser < AbstractParser
4
+
5
+ self.properties = ITEM_PROPERTIES
6
+
7
+ attr_accessor :feed
8
+
9
+ def initialize(xml, format, feed)
10
+ super(xml, format)
11
+ self.feed = feed
12
+ end
13
+
14
+ end
15
+
16
+ class Rss2ItemParser < ItemParser
17
+
18
+ self.properties = ITEM_PROPERTIES
19
+
20
+ def author
21
+ fetch_rss_person("author")
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,6 @@
1
+ namespace :feed_me do
2
+ desc "Do something for feed_me"
3
+ task :default do
4
+ puts "feed_me doesn't do anything"
5
+ end
6
+ end
@@ -0,0 +1,15 @@
1
+ module FeedMe
2
+
3
+ class SimpleStruct
4
+
5
+ def initialize(hash = {})
6
+ (class << self; self; end).module_eval do
7
+ hash.each do |method, result|
8
+ define_method( method ) { result }
9
+ end
10
+ end
11
+ end
12
+
13
+ end
14
+
15
+ end
@@ -0,0 +1,295 @@
1
+ require File.join( File.dirname(__FILE__), "spec_helper" )
2
+
3
+ require 'feed_me'
4
+
5
+ describe "all parsing methods", :shared => true do
6
+ it "should identify an atom feed" do
7
+ @atom.should be_an_instance_of(FeedMe::AtomFeedParser)
8
+ @atom.format.should == :atom
9
+ @atom.root_node.xpath == "//feed[@xmlns='http://www.w3.org/2005/Atom']"
10
+ end
11
+
12
+ it "should identify an rss2 feed" do
13
+ @rss2.should be_an_instance_of(FeedMe::Rss2FeedParser)
14
+ @rss2.format.should == :rss2
15
+ @rss2.root_node.xpath == "//rss[@version=2.0]/channel"
16
+ end
17
+ end
18
+
19
+ describe FeedMe::FeedParser do
20
+
21
+ before :each do
22
+ @atom_feed = hpricot_fixture('welformed.atom') / "//feed[@xmlns='http://www.w3.org/2005/Atom']"
23
+ @atom = FeedMe::FeedParser.build(@atom_feed, :atom)
24
+ @rss2_feed = hpricot_fixture('welformed.rss2') / "//rss[@version=2.0]/channel"
25
+ @rss2 = FeedMe::FeedParser.build(@rss2_feed, :rss2)
26
+ end
27
+
28
+ it "should be an atom parser for an atom feed" do
29
+ @atom.should be_an_instance_of(FeedMe::AtomFeedParser)
30
+ end
31
+
32
+ describe ".parse" do
33
+ before(:each) do
34
+ @atom = FeedMe::FeedParser.parse(open(fixture('welformed.atom')).read)
35
+ @rss2 = FeedMe::FeedParser.parse(open(fixture('welformed.rss2')).read)
36
+ end
37
+
38
+ it_should_behave_like "all parsing methods"
39
+
40
+ describe "with bad input" do
41
+ it "should raise on an empty body" do
42
+ lambda { FeedMe::FeedParser.parse("") }.should raise_error(FeedMe::InvalidFeedFormat)
43
+ end
44
+
45
+ it "should raise on a body with non-recognised xml" do
46
+ lambda {
47
+ FeedMe::FeedParser.parse(%Q|<?xml version="1.0" encoding="UTF-8"?>"<foo>bar</foo>|)
48
+ }.should raise_error(FeedMe::InvalidFeedFormat)
49
+ end
50
+ end
51
+
52
+ end
53
+
54
+ describe ".open" do
55
+ before(:each) do
56
+ @atom = FeedMe::FeedParser.open(fixture('welformed.atom'))
57
+ @rss2 = FeedMe::FeedParser.open(fixture('welformed.rss2'))
58
+ end
59
+
60
+ it_should_behave_like "all parsing methods"
61
+ end
62
+
63
+ describe '#title' do
64
+ it "should be valid for an atom feed" do
65
+ @atom.title.should == "Test feed"
66
+ end
67
+
68
+ it "should be valid for an rss2 feed" do
69
+ @rss2.title.should == "Lift Off News"
70
+ end
71
+ end
72
+
73
+ describe '#description' do
74
+ it "should be valid for an atom feed" do
75
+ @atom.description.should == "Monkey test feed"
76
+ end
77
+
78
+ it "should be valid for an rss2 feed" do
79
+ @rss2.description.should == "Liftoff to Space Exploration."
80
+ end
81
+ end
82
+
83
+ describe '#feed_id' do
84
+ it "should be valid for an atom feed" do
85
+ @atom.feed_id.should == "tag:imaginary.host:nyheter"
86
+ end
87
+
88
+ it "should be nil for an rss2 feed" do
89
+ @rss2.feed_id.should be_nil
90
+ end
91
+ end
92
+
93
+ describe '#updated_at' do
94
+ it "should be valid for an atom feed" do
95
+ @atom.updated_at.should == Time.utc(2008, 3, 7, 20, 41, 10)
96
+ end
97
+
98
+ it "should be valid for an rss2 feed" do
99
+ @rss2.updated_at.should == Time.utc(2003, 6, 10, 9, 41, 1)
100
+ end
101
+ end
102
+
103
+ describe '#href' do
104
+ it "should be valid for an atom feed" do
105
+ @atom.href.should == "http://imaginary.host/posts.atom"
106
+ end
107
+
108
+ it "should be nil for an atom feed" do
109
+ @rss2.href.should be_nil
110
+ end
111
+ end
112
+
113
+ describe '#url' do
114
+ it "should be valid for an atom feed" do
115
+ @atom.url.should == "http://imaginary.host/posts"
116
+ end
117
+
118
+ it "should be valid for an rss2 feed" do
119
+ @rss2.url.should == "http://liftoff.msfc.nasa.gov/"
120
+ end
121
+ end
122
+
123
+ describe '#generator' do
124
+ it "should be valid for an atom feed" do
125
+ @atom.generator.should == "Roll your own"
126
+ end
127
+
128
+ it "should be valid for an rss2 feed" do
129
+ @rss2.generator.should == "Weblog Editor 2.0"
130
+ end
131
+ end
132
+
133
+ describe '#format' do
134
+ it "should be :atom for an atom feed" do
135
+ @atom.format.should == :atom
136
+ end
137
+
138
+ it "should be :rss2 for an rss2 feed" do
139
+ @rss2.format.should == :rss2
140
+ end
141
+ end
142
+
143
+ describe '#author.name' do
144
+ it "should be valid for an atom feed" do
145
+ @atom.author.name.should == "Frank"
146
+ end
147
+
148
+ it "should be valid for an rss2 feed" do
149
+ @rss2.author.name.should == "Mary Jo"
150
+ end
151
+ end
152
+
153
+ describe '#author.email' do
154
+ it "should be valid for an atom feed" do
155
+ @atom.author.email.should == "frank@imaginary.host"
156
+ end
157
+
158
+ it "should be valid for an rss2 feed" do
159
+ @rss2.author.email.should == "editor@example.com"
160
+ end
161
+ end
162
+
163
+ describe '#author.uri' do
164
+ it "should be valid for an atom feed" do
165
+ @atom.author.uri.should == "http://imaginary.host/students/frank"
166
+ end
167
+
168
+ it "should be nil for an rss2 feed" do
169
+ @rss2.author.uri.should be_nil
170
+ end
171
+ end
172
+
173
+ describe '#entries' do
174
+ it "should return an array of entries for an atom feed" do
175
+ @atom.entries.should be_an_instance_of(Array)
176
+ end
177
+
178
+ it "should have the correct length for an atom feed" do
179
+ @atom.should have(3).entries
180
+ end
181
+
182
+ it "should return items that are properly parsed for an atom feed" do
183
+ @atom.entries.first.title.should == "First title"
184
+ @atom.entries.first.url.should == "http://imaginary.host/posts/3"
185
+ end
186
+
187
+ it "should return an array of entries for an rss2 feed" do
188
+ @rss2.entries.should be_an_instance_of(Array)
189
+ end
190
+
191
+ it "should have the correct length for an rss2 feed" do
192
+ @rss2.should have(4).entries
193
+ end
194
+
195
+ it "should return items that are properly parsed for an rss2 feed" do
196
+ @rss2.entries.first.title.should == "Star City"
197
+ @rss2.entries.first.url.should == "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
198
+ @rss2.entries.first.item_id.should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
199
+ end
200
+
201
+ it "should allow items to be read more than once" do
202
+ item = @rss2.entries.first
203
+ item.item_id.should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
204
+ item.item_id.should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
205
+ end
206
+ end
207
+
208
+ describe '#to_hash' do
209
+ it "should serialize the title of an atom feed" do
210
+ @atom.to_hash[:title].should == "Test feed"
211
+ end
212
+
213
+ it "should serialize the description of an atom feed" do
214
+ @atom.to_hash[:description].should == "Monkey test feed"
215
+ end
216
+
217
+ it "should serialize the feed_id of an atom feed" do
218
+ @atom.to_hash[:feed_id].should == "tag:imaginary.host:nyheter"
219
+ end
220
+
221
+ it "should serialize the updated_at time of an atom feed" do
222
+ @atom.to_hash[:updated_at].should == Time.utc(2008, 3, 7, 20, 41, 10)
223
+ end
224
+
225
+ it "should serialize the href of an atom feed" do
226
+ @atom.to_hash[:href].should == "http://imaginary.host/posts.atom"
227
+ end
228
+
229
+ it "should serialize the url of an atom feed" do
230
+ @atom.to_hash[:url].should == "http://imaginary.host/posts"
231
+ end
232
+
233
+ it "should serialize the generator of an atom feed" do
234
+ @atom.to_hash[:generator].should == "Roll your own"
235
+ end
236
+
237
+ it "should serialize the entries of an atom feed" do
238
+ @atom.to_hash[:entries].should be_an_instance_of(Array)
239
+ @atom.to_hash[:entries].first.title.should == "First title"
240
+ @atom.to_hash[:entries].first.url.should == "http://imaginary.host/posts/3"
241
+ end
242
+
243
+ it "should serialize the author of an atom feed" do
244
+ author = @atom.to_hash[:author]
245
+
246
+ author.name.should == "Frank"
247
+ author.email.should == "frank@imaginary.host"
248
+ author.uri.should == "http://imaginary.host/students/frank"
249
+ end
250
+
251
+ it "should serialize the title of an rss2 feed" do
252
+ @rss2.to_hash[:title].should == "Lift Off News"
253
+ end
254
+
255
+ it "should serialize the description of an rss2 feed" do
256
+ @rss2.to_hash[:description].should == "Liftoff to Space Exploration."
257
+ end
258
+
259
+ it "should serialize the feed_id of an rss2 feed" do
260
+ @rss2.to_hash[:feed_id].should be_nil
261
+ end
262
+
263
+ it "should serialize the updated_at time of an rss2 feed" do
264
+ @rss2.to_hash[:updated_at].should == Time.utc(2003, 6, 10, 9, 41, 1)
265
+ end
266
+
267
+ it "should serialize the href of an rss2 feed" do
268
+ @rss2.to_hash[:href].should be_nil
269
+ end
270
+
271
+ it "should serialize the url of an rss2 feed" do
272
+ @rss2.to_hash[:url].should == "http://liftoff.msfc.nasa.gov/"
273
+ end
274
+
275
+ it "should serialize the generator of an rss2 feed" do
276
+ @rss2.to_hash[:generator].should == "Weblog Editor 2.0"
277
+ end
278
+
279
+ it "should serialize the entries of an rss2 feed" do
280
+ @rss2.to_hash[:entries].should be_an_instance_of(Array)
281
+ @rss2.to_hash[:entries].first.title.should == "Star City"
282
+ @rss2.to_hash[:entries].first.url.should == "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
283
+ end
284
+
285
+ it "should serialize the author of an rss2 feed" do
286
+
287
+ author = @rss2.to_hash[:author]
288
+
289
+ author.name.should == "Mary Jo"
290
+ author.email.should == "editor@example.com"
291
+ author.uri.should be_nil
292
+ end
293
+ end
294
+
295
+ end
@@ -0,0 +1,188 @@
1
+ require File.join( File.dirname(__FILE__), "spec_helper" )
2
+
3
+ require 'feed_me'
4
+
5
+ describe FeedMe::ItemParser do
6
+
7
+ before :each do
8
+ @atom_feed = FeedMe::FeedParser.open(fixture('welformed.atom'))
9
+ @atom = FeedMe::ItemParser.build(@atom_feed.root_node.search('/entry').first, :atom, @atom_feed)
10
+ @rss2_feed = FeedMe::FeedParser.open(fixture('welformed.rss2'))
11
+ @rss2 = FeedMe::ItemParser.build(@rss2_feed.root_node.search('/item').first, :rss2, @rss2_feed)
12
+ end
13
+
14
+ describe '#to_hash' do
15
+ it "should serialize the parsed properties to a hash" do
16
+
17
+ end
18
+ end
19
+
20
+ describe '#title' do
21
+ it "should be valid for an atom feed" do
22
+ @atom.title.should == "First title"
23
+ end
24
+
25
+ it "should be valid for an rss2 feed" do
26
+ @rss2.title.should == "Star City"
27
+ end
28
+ end
29
+
30
+ describe '#content' do
31
+ it "should be valid for an atom feed" do
32
+ @atom.content.should == "Here be content"
33
+ end
34
+
35
+ it "should be valid for an rss2 feed" do
36
+ @rss2.content.should == "This is content"
37
+ end
38
+ end
39
+
40
+ describe '#item_id' do
41
+ it "should be valid for an atom feed" do
42
+ @atom.item_id.should == "tag:imaginary.host,2008-03-07:nyheter/3"
43
+ end
44
+
45
+ it "should be valid for an rss2 feed" do
46
+ @rss2.item_id.should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
47
+ end
48
+ end
49
+
50
+ describe '#updated_at' do
51
+ it "should be valid for an atom feed" do
52
+ @atom.updated_at.should == Time.utc( 2008, 3, 7, 20, 41, 10 )
53
+ end
54
+
55
+ it "should be valid for an rss2 feed" do
56
+ @rss2.updated_at.should == Time.utc(2003, 6, 3, 9, 39, 21)
57
+ end
58
+ end
59
+
60
+ describe '#url' do
61
+ it "should be valid for an atom feed" do
62
+ @atom.url.should == "http://imaginary.host/posts/3"
63
+ end
64
+
65
+ it "should be valid for an rss2 feed" do
66
+ @rss2.url.should == "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
67
+ end
68
+ end
69
+
70
+ describe '#format' do
71
+ it "should be :atom for an atom feed" do
72
+ @atom.format.should == :atom
73
+ end
74
+
75
+ it "should be :rss2 for an rss2 feed" do
76
+ @rss2.format.should == :rss2
77
+ end
78
+ end
79
+
80
+ describe '#categories' do
81
+ it "should be correct for an rss2 feed" do
82
+ @rss2.categories.should == ['news', 'chuck']
83
+ end
84
+ end
85
+
86
+ describe '#author.name' do
87
+ it "should be valid for an atom feed" do
88
+ @atom.author.name.should == "Jonas Nicklas"
89
+ end
90
+
91
+ it "should be valid for an rss2 feed" do
92
+ @rss2.author.name.should == "Chuck Norris"
93
+ end
94
+ end
95
+
96
+ describe '#author.email' do
97
+ it "should be valid for an atom feed" do
98
+ @atom.author.email.should == "jonas.nicklas@imaginary.host"
99
+ end
100
+
101
+ it "should be valid for an rss2 feed" do
102
+ @rss2.author.email.should == "da_man@example.com"
103
+ end
104
+ end
105
+
106
+ describe '#author.uri' do
107
+ it "should be valid for an atom feed" do
108
+ @atom.author.uri.should == "http://imaginary.host/students/jnicklas"
109
+ end
110
+
111
+ it "should be nil for an rss2 feed" do
112
+ @rss2.author.uri.should be_nil
113
+ end
114
+ end
115
+
116
+ describe '#to_hash' do
117
+
118
+ it "should serialize the title for an atom feed" do
119
+ @atom.to_hash[:title].should == "First title"
120
+ end
121
+
122
+ it "should serialize the item_id for an atom feed" do
123
+ @atom.to_hash[:item_id].should == "tag:imaginary.host,2008-03-07:nyheter/3"
124
+ end
125
+
126
+ it "should serialize updated_at for an atom feed" do
127
+ @atom.to_hash[:updated_at].should == Time.utc( 2008, 3, 7, 20, 41, 10 )
128
+ end
129
+
130
+ it "should serialize the url for an atom feed" do
131
+ @atom.to_hash[:url].should == "http://imaginary.host/posts/3"
132
+ end
133
+
134
+ it "should serialize the author of an atom feed" do
135
+ author = @atom.to_hash[:author]
136
+
137
+ author.name.should == "Jonas Nicklas"
138
+ author.email.should == "jonas.nicklas@imaginary.host"
139
+ author.uri.should == "http://imaginary.host/students/jnicklas"
140
+ end
141
+
142
+ it "should serialize the title for an rss2 feed" do
143
+ @rss2.to_hash[:title].should == "Star City"
144
+ end
145
+
146
+ it "should serialize the item_id for an rss2 feed" do
147
+ @rss2.to_hash[:item_id].should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
148
+ end
149
+
150
+ it "should serialize updated_at for an rss2 feed" do
151
+ @rss2.to_hash[:updated_at].should == Time.utc(2003, 6, 3, 9, 39, 21)
152
+ end
153
+
154
+ it "should serialize the url for an rss2 feed" do
155
+ @rss2.to_hash[:url].should == "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
156
+ end
157
+
158
+ it "should serialize the author of an rss2 feed" do
159
+ author = @rss2.to_hash[:author]
160
+
161
+ author.name.should == "Chuck Norris"
162
+ author.email.should == "da_man@example.com"
163
+ author.uri.should be_nil
164
+ end
165
+ end
166
+
167
+ end
168
+
169
+ describe "Without an author", FeedMe::ItemParser do
170
+
171
+ before :each do
172
+ @atom_feed = FeedMe::FeedParser.open(fixture('welformed.atom'))
173
+ @atom = FeedMe::ItemParser.build(@atom_feed.root_node.search('/entry')[1], :atom, @atom_feed)
174
+ @rss2_feed = FeedMe::FeedParser.open(fixture('welformed.rss2'))
175
+ @rss2 = FeedMe::ItemParser.build(@rss2_feed.root_node.search('/item')[1], :rss2, @rss2_feed)
176
+ end
177
+
178
+ describe '#author.name' do
179
+ it "should be valid for an atom feed" do
180
+ @atom.author.name.should be_nil
181
+ end
182
+
183
+ it "should be valid for an rss2 feed" do
184
+ @rss2.author.name.should be_nil
185
+ end
186
+ end
187
+
188
+ end
@@ -0,0 +1,14 @@
1
+ require File.join( File.dirname(__FILE__), "spec_helper" )
2
+
3
+ require 'feed_me'
4
+
5
+ describe FeedMe::SimpleStruct do
6
+
7
+ it "should append methods" do
8
+ struct = FeedMe::SimpleStruct.new(:foo => "blah", :bar => 23)
9
+
10
+ struct.foo.should == "blah"
11
+ struct.bar.should == 23
12
+ end
13
+
14
+ end
@@ -0,0 +1,25 @@
1
+ $TESTING=true
2
+ $:.push File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'rubygems'
5
+ # require 'ruby-debug'
6
+
7
+ module Fixtures
8
+
9
+ def fixture_path
10
+ File.join(File.dirname(__FILE__), 'fixtures')
11
+ end
12
+
13
+ def fixture(name)
14
+ File.join(fixture_path, name)
15
+ end
16
+
17
+ def hpricot_fixture(name)
18
+ Hpricot.XML(open(fixture(name)).read)
19
+ end
20
+
21
+ end
22
+
23
+ Spec::Runner.configure do |config|
24
+ config.include(Fixtures)
25
+ end
metadata ADDED
@@ -0,0 +1,81 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: feed_me
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.5
5
+ platform: ruby
6
+ authors:
7
+ - Jonas Nicklas
8
+ - Jonathan Stott
9
+ autorequire: feed_me
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2009-11-06 00:00:00 +00:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: hpricot
18
+ type: :runtime
19
+ version_requirement:
20
+ version_requirements: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: "0"
25
+ version:
26
+ description: Nice and simple RSS and atom feed parsing built on hpricot
27
+ email: jonas.nicklas@gmail.com
28
+ executables: []
29
+
30
+ extensions: []
31
+
32
+ extra_rdoc_files:
33
+ - LICENSE
34
+ - README
35
+ - TODO
36
+ files:
37
+ - LICENSE
38
+ - README
39
+ - Rakefile
40
+ - TODO
41
+ - lib/feed_me.rb
42
+ - lib/feed_me/abstract_parser.rb
43
+ - lib/feed_me/consts.rb
44
+ - lib/feed_me/feed_parser.rb
45
+ - lib/feed_me/feed_struct.rb
46
+ - lib/feed_me/item_parser.rb
47
+ - lib/feed_me/merbtasks.rb
48
+ - lib/feed_me/simple_struct.rb
49
+ has_rdoc: true
50
+ homepage: http://github.com/jnicklas/feed_me
51
+ licenses: []
52
+
53
+ post_install_message:
54
+ rdoc_options:
55
+ - --charset=UTF-8
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: "0"
63
+ version:
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: "0"
69
+ version:
70
+ requirements: []
71
+
72
+ rubyforge_project:
73
+ rubygems_version: 1.3.5
74
+ signing_key:
75
+ specification_version: 3
76
+ summary: Nice and simple RSS and atom feed parsing built on hpricot
77
+ test_files:
78
+ - spec/feed_parser_spec.rb
79
+ - spec/item_parser_spec.rb
80
+ - spec/simple_struct_spec.rb
81
+ - spec/spec_helper.rb