feed_me 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Jonas Nicklas
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,23 @@
1
+ Feed Me
2
+ =======
3
+
4
+ Feed Me is a simple parser for RSS2 and Atom feed, adding other feed formats should be trivial. Feed Me is pretty minimal and basically only does translation/cleanup from different feed formats to a consistent API. It is designed to be minimal.
5
+
6
+ Feed Me is built on the excellent Hpricot parser written by _why the lucky stiff.
7
+
8
+ Use it like this:
9
+
10
+ file = File.read('some_feed.atom')
11
+
12
+ feed = FeedMe.parse(file)
13
+
14
+ feed.title
15
+ feed.author.name
16
+ feed.entries.each do |entry|
17
+ entry.title
18
+ entry.content
19
+ end
20
+
21
+ Check out the specs or lib/feed_me/consts.rb for the complete API.
22
+
23
+ DISCLAIMER: This is very much alpha software, use at your own risk!
data/Rakefile ADDED
@@ -0,0 +1,59 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'spec/rake/spectask'
4
+
5
+ file_list = FileList['spec/*_spec.rb']
6
+
7
+ namespace :spec do
8
+ desc "Run all examples with RCov"
9
+ Spec::Rake::SpecTask.new('rcov') do |t|
10
+ t.spec_files = file_list
11
+ t.rcov = true
12
+ t.rcov_dir = "doc/coverage"
13
+ t.rcov_opts = ['--exclude', 'spec']
14
+ end
15
+
16
+ desc "Generate an html report"
17
+ Spec::Rake::SpecTask.new('report') do |t|
18
+ t.spec_files = file_list
19
+ t.rcov_opts = ['--exclude', 'spec']
20
+ t.spec_opts = ["--format", "html:doc/reports/specs.html"]
21
+ t.fail_on_error = false
22
+ end
23
+
24
+ end
25
+
26
+ desc 'Default: run specs.'
27
+ task :default => 'spec:rcov'
28
+
29
+ PLUGIN = "feed_me"
30
+ NAME = "feed_me"
31
+ AUTHOR = ["Jonas Nicklas", "Jonathan Stott"]
32
+ EMAIL = "jonas.nicklas@gmail.com"
33
+ HOMEPAGE = "http://github.com/jnicklas/feed_me"
34
+ SUMMARY = "Nice and simple RSS and atom feed parsing built on hpricot"
35
+
36
+ spec = Gem::Specification.new do |s|
37
+ end
38
+
39
+ begin
40
+ require 'jeweler'
41
+ Jeweler::Tasks.new do |s|
42
+ s.name = NAME
43
+ s.platform = Gem::Platform::RUBY
44
+ s.has_rdoc = true
45
+ s.extra_rdoc_files = ["README", "LICENSE", 'TODO']
46
+ s.summary = SUMMARY
47
+ s.description = s.summary
48
+ s.authors = AUTHOR
49
+ s.email = EMAIL
50
+ s.homepage = HOMEPAGE
51
+ s.require_path = 'lib'
52
+ s.autorequire = PLUGIN
53
+ s.add_dependency('hpricot')
54
+ s.files = %w(LICENSE README Rakefile TODO) + Dir.glob("{lib,specs}/**/*")
55
+ end
56
+ Jeweler::GemcutterTasks.new
57
+ rescue
58
+ puts "Jeweler, or one of its dependencies, is not available. Install it with: sudo gem install namelessjon-jeweler -s http://gems.github.com"
59
+ end
data/TODO ADDED
@@ -0,0 +1,2 @@
1
+ TODO:
2
+ - Add categories to atom feeds
data/lib/feed_me.rb ADDED
@@ -0,0 +1,36 @@
1
+ # make sure we're running inside Merb
2
+ if defined?(Merb::Plugins)
3
+ dependency 'hpricot'
4
+ else
5
+ require 'rubygems'
6
+ require 'hpricot'
7
+ end
8
+ require 'time'
9
+
10
+ unless nil.respond_to? :try
11
+ # the ultimate duck
12
+ class Object
13
+ def try(method, *args)
14
+ self.send(method, *args)
15
+ rescue NoMethodError
16
+ nil
17
+ end
18
+ end
19
+ end
20
+
21
+ module FeedMe
22
+ class InvalidFeedFormat < StandardError ; end
23
+
24
+ def self.parse(feed)
25
+ FeedMe::FeedParser.parse(feed)
26
+ end
27
+
28
+ def self.open(file)
29
+ FeedMe::FeedParser.parse(file)
30
+ end
31
+ end
32
+
33
+ ['consts', 'abstract_parser', 'feed_struct', 'simple_struct',
34
+ 'feed_parser', 'item_parser'].each do |f|
35
+ require File.join(File.dirname(__FILE__), 'feed_me', f)
36
+ end
@@ -0,0 +1,119 @@
1
+ class FeedMe::AbstractParser
2
+
3
+ class << self
4
+
5
+ attr_accessor :properties, :root_nodes
6
+
7
+ def build(xml, format, *args)
8
+ # in a world with activesupport this would have been written as
9
+ # format_parser = (format.to_s.camelize + self.to_s).constantize
10
+ camelized_format = format.to_s.split('_').map{ |w| w.capitalize }.join('')
11
+ bare_class = self.to_s.split('::').last
12
+
13
+ begin
14
+ format_parser = FeedMe.const_get(camelized_format + bare_class)
15
+ rescue NameError
16
+ end
17
+
18
+ if format_parser.is_a?(Class) and format_parser.ancestors.include?(self)
19
+ return format_parser.new(xml, format, *args)
20
+ else
21
+ return self.new(xml, format, *args)
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+
28
+ def initialize(xml, format)
29
+ self.xml = xml
30
+ self.format = format
31
+ self.properties = self.class.properties[self.format]
32
+
33
+ append_methods
34
+ end
35
+
36
+ def to_hash
37
+ hash = {}
38
+ self.properties.each do |method, p|
39
+ hash[method] = self.send(method)
40
+ end
41
+ return hash
42
+ end
43
+
44
+ attr_accessor :xml, :format, :properties
45
+
46
+ alias_method :root_node, :xml
47
+
48
+ protected
49
+
50
+ def fetch_rss_person(selector)
51
+ item = fetch(selector)
52
+ if(item)
53
+ email, name = item.split(/\s+/, 2)
54
+ name = name.match( /\((.*?)\)/ ).to_a[1] if name # strip parentheses
55
+ else
56
+ name, email = nil
57
+ end
58
+ FeedMe::SimpleStruct.new(:email => email, :name => name, :uri => nil)
59
+ end
60
+
61
+ def append_methods
62
+ self.properties.each do |method, p|
63
+ unless respond_to?(method)
64
+ block = get_proc_for_property(method, p)
65
+ # meta programming magic
66
+ (class << self; self; end).module_eval do
67
+ define_method method, &block
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ def get_proc_for_property(method, p)
74
+ if p.class == Array
75
+ return caching_proc(method, &proc { fetch("/#{p[0]}", root_node, p[1].to_sym) })
76
+ elsif p.class == Hash
77
+ return caching_proc(method, &proc { FeedMe::FeedStruct.new(root_node, p) })
78
+ elsif p != :undefined
79
+ return caching_proc(method, &proc { fetch("/#{p}", root_node) })
80
+ else
81
+ return proc { nil }
82
+ end
83
+ end
84
+
85
+ def caching_proc(name, &block)
86
+ proc do
87
+ ivar = instance_variable_get("@#{name}")
88
+ unless ivar
89
+ result = yield
90
+ instance_variable_set("@#{name}", result)
91
+ return result
92
+ end
93
+ ivar
94
+ end
95
+ end
96
+
97
+ def fetch(selector, search_in = xml, method = :inner_html)
98
+ item = search_in.search(selector)
99
+
100
+ unless method == :array
101
+ self.try("extract_" + method.to_s, item.first) unless item.empty?
102
+ else
103
+ item.map { |i| self.try("extract_inner_html", i) }
104
+ end
105
+ end
106
+
107
+ def extract_inner_html(item)
108
+ item.inner_html
109
+ end
110
+
111
+ def extract_href(item)
112
+ item[:href]
113
+ end
114
+
115
+ def extract_time(item)
116
+ Time.parse(item.inner_html).utc
117
+ end
118
+
119
+ end
@@ -0,0 +1,69 @@
1
+ module FeedMe
2
+
3
+ ROOT_NODES = {
4
+ :atom => "//feed[@xmlns='http://www.w3.org/2005/Atom']",
5
+ :rss2 => "//rss[@version=2.0]/channel"
6
+ }
7
+
8
+ FEED_PROPERTIES = {
9
+ :atom => {
10
+ :title => :title,
11
+ :updated_at => [:updated, :time],
12
+ :feed_id => :id,
13
+ :url => ["link[@rel=alternate]", :href],
14
+ :href => ["link[@rel=self]", :href],
15
+ :description => :subtitle,
16
+ :generator => :generator,
17
+ :author => {
18
+ :email => 'author/email',
19
+ :name => 'author/name',
20
+ :uri => 'author/uri'
21
+ },
22
+ :entries => :special
23
+ },
24
+ :rss2 => {
25
+ :title => :title,
26
+ :updated_at => [:lastBuildDate, :time],
27
+ :feed_id => :undefined,
28
+ :url => :link,
29
+ :href => :undefined,
30
+ :description => :description,
31
+ :generator => :generator,
32
+ :author => :special,
33
+ :entries => :special
34
+ }
35
+ }
36
+
37
+ ITEM_PROPERTIES = {
38
+ :atom => {
39
+ :title => :title,
40
+ :updated_at => [:updated, :time],
41
+ :item_id => :id,
42
+ :url => ["link[@rel=alternate]", :href],
43
+ :content => :content,
44
+ :author => {
45
+ :email => 'author/email',
46
+ :name => 'author/name',
47
+ :uri => 'author/uri'
48
+ }
49
+ },
50
+ :rss2 => {
51
+ :title => :title,
52
+ :updated_at => [:pubDate, :time],
53
+ :item_id => :guid,
54
+ :url => :link,
55
+ :content => :description,
56
+ :author => :special,
57
+ :categories => [:category, :array]
58
+ }
59
+ }
60
+
61
+ AUTHOR_PROPERTIES = {
62
+ :atom => {
63
+ :name => :name,
64
+ :uri => :uri,
65
+ :email => :email
66
+ }
67
+ }
68
+
69
+ end
@@ -0,0 +1,59 @@
1
+ module FeedMe
2
+
3
+ class FeedParser < AbstractParser
4
+
5
+ self.properties = FEED_PROPERTIES
6
+
7
+ class << self
8
+
9
+ def open(file)
10
+ self.parse(Kernel.open(file).read)
11
+ end
12
+
13
+ # parses the passed feed and identifeis what kind of feed it is
14
+ # then returns a parser object
15
+ def parse(feed)
16
+ xml = Hpricot.XML(feed)
17
+
18
+ root_node, format = self.identify(xml)
19
+ raise InvalidFeedFormat if format.nil?
20
+
21
+ self.build(root_node, format)
22
+ end
23
+
24
+ protected
25
+
26
+ def identify(xml)
27
+ FeedMe::ROOT_NODES.each do |f, s|
28
+ item = xml.at(s)
29
+ return item, f if item
30
+ end
31
+ end
32
+
33
+ end
34
+ end
35
+
36
+ class AtomFeedParser < FeedParser
37
+ self.properties = FEED_PROPERTIES
38
+
39
+ def entries
40
+ xml.search('entry').map do |el|
41
+ ItemParser.build(el, self.format, self)
42
+ end
43
+ end
44
+ end
45
+
46
+ class Rss2FeedParser < FeedParser
47
+ self.properties = FEED_PROPERTIES
48
+
49
+ def entries
50
+ xml.search('item').map do |el|
51
+ ItemParser.build(el, self.format, self)
52
+ end
53
+ end
54
+
55
+ def author
56
+ fetch_rss_person("managingEditor")
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,13 @@
1
+ module FeedMe
2
+
3
+ class FeedStruct < AbstractParser
4
+
5
+ def initialize(xml, properties)
6
+ self.xml = xml
7
+ self.properties = properties
8
+ append_methods
9
+ end
10
+
11
+ end
12
+
13
+ end
@@ -0,0 +1,25 @@
1
+ module FeedMe
2
+
3
+ class ItemParser < AbstractParser
4
+
5
+ self.properties = ITEM_PROPERTIES
6
+
7
+ attr_accessor :feed
8
+
9
+ def initialize(xml, format, feed)
10
+ super(xml, format)
11
+ self.feed = feed
12
+ end
13
+
14
+ end
15
+
16
+ class Rss2ItemParser < ItemParser
17
+
18
+ self.properties = ITEM_PROPERTIES
19
+
20
+ def author
21
+ fetch_rss_person("author")
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,6 @@
1
+ namespace :feed_me do
2
+ desc "Do something for feed_me"
3
+ task :default do
4
+ puts "feed_me doesn't do anything"
5
+ end
6
+ end
@@ -0,0 +1,15 @@
1
+ module FeedMe
2
+
3
+ class SimpleStruct
4
+
5
+ def initialize(hash = {})
6
+ (class << self; self; end).module_eval do
7
+ hash.each do |method, result|
8
+ define_method( method ) { result }
9
+ end
10
+ end
11
+ end
12
+
13
+ end
14
+
15
+ end
@@ -0,0 +1,295 @@
1
+ require File.join( File.dirname(__FILE__), "spec_helper" )
2
+
3
+ require 'feed_me'
4
+
5
+ describe "all parsing methods", :shared => true do
6
+ it "should identify an atom feed" do
7
+ @atom.should be_an_instance_of(FeedMe::AtomFeedParser)
8
+ @atom.format.should == :atom
9
+ @atom.root_node.xpath == "//feed[@xmlns='http://www.w3.org/2005/Atom']"
10
+ end
11
+
12
+ it "should identify an rss2 feed" do
13
+ @rss2.should be_an_instance_of(FeedMe::Rss2FeedParser)
14
+ @rss2.format.should == :rss2
15
+ @rss2.root_node.xpath == "//rss[@version=2.0]/channel"
16
+ end
17
+ end
18
+
19
+ describe FeedMe::FeedParser do
20
+
21
+ before :each do
22
+ @atom_feed = hpricot_fixture('welformed.atom') / "//feed[@xmlns='http://www.w3.org/2005/Atom']"
23
+ @atom = FeedMe::FeedParser.build(@atom_feed, :atom)
24
+ @rss2_feed = hpricot_fixture('welformed.rss2') / "//rss[@version=2.0]/channel"
25
+ @rss2 = FeedMe::FeedParser.build(@rss2_feed, :rss2)
26
+ end
27
+
28
+ it "should be an atom parser for an atom feed" do
29
+ @atom.should be_an_instance_of(FeedMe::AtomFeedParser)
30
+ end
31
+
32
+ describe ".parse" do
33
+ before(:each) do
34
+ @atom = FeedMe::FeedParser.parse(open(fixture('welformed.atom')).read)
35
+ @rss2 = FeedMe::FeedParser.parse(open(fixture('welformed.rss2')).read)
36
+ end
37
+
38
+ it_should_behave_like "all parsing methods"
39
+
40
+ describe "with bad input" do
41
+ it "should raise on an empty body" do
42
+ lambda { FeedMe::FeedParser.parse("") }.should raise_error(FeedMe::InvalidFeedFormat)
43
+ end
44
+
45
+ it "should raise on a body with non-recognised xml" do
46
+ lambda {
47
+ FeedMe::FeedParser.parse(%Q|<?xml version="1.0" encoding="UTF-8"?>"<foo>bar</foo>|)
48
+ }.should raise_error(FeedMe::InvalidFeedFormat)
49
+ end
50
+ end
51
+
52
+ end
53
+
54
+ describe ".open" do
55
+ before(:each) do
56
+ @atom = FeedMe::FeedParser.open(fixture('welformed.atom'))
57
+ @rss2 = FeedMe::FeedParser.open(fixture('welformed.rss2'))
58
+ end
59
+
60
+ it_should_behave_like "all parsing methods"
61
+ end
62
+
63
+ describe '#title' do
64
+ it "should be valid for an atom feed" do
65
+ @atom.title.should == "Test feed"
66
+ end
67
+
68
+ it "should be valid for an rss2 feed" do
69
+ @rss2.title.should == "Lift Off News"
70
+ end
71
+ end
72
+
73
+ describe '#description' do
74
+ it "should be valid for an atom feed" do
75
+ @atom.description.should == "Monkey test feed"
76
+ end
77
+
78
+ it "should be valid for an rss2 feed" do
79
+ @rss2.description.should == "Liftoff to Space Exploration."
80
+ end
81
+ end
82
+
83
+ describe '#feed_id' do
84
+ it "should be valid for an atom feed" do
85
+ @atom.feed_id.should == "tag:imaginary.host:nyheter"
86
+ end
87
+
88
+ it "should be nil for an rss2 feed" do
89
+ @rss2.feed_id.should be_nil
90
+ end
91
+ end
92
+
93
+ describe '#updated_at' do
94
+ it "should be valid for an atom feed" do
95
+ @atom.updated_at.should == Time.utc(2008, 3, 7, 20, 41, 10)
96
+ end
97
+
98
+ it "should be valid for an rss2 feed" do
99
+ @rss2.updated_at.should == Time.utc(2003, 6, 10, 9, 41, 1)
100
+ end
101
+ end
102
+
103
+ describe '#href' do
104
+ it "should be valid for an atom feed" do
105
+ @atom.href.should == "http://imaginary.host/posts.atom"
106
+ end
107
+
108
+ it "should be nil for an atom feed" do
109
+ @rss2.href.should be_nil
110
+ end
111
+ end
112
+
113
+ describe '#url' do
114
+ it "should be valid for an atom feed" do
115
+ @atom.url.should == "http://imaginary.host/posts"
116
+ end
117
+
118
+ it "should be valid for an rss2 feed" do
119
+ @rss2.url.should == "http://liftoff.msfc.nasa.gov/"
120
+ end
121
+ end
122
+
123
+ describe '#generator' do
124
+ it "should be valid for an atom feed" do
125
+ @atom.generator.should == "Roll your own"
126
+ end
127
+
128
+ it "should be valid for an rss2 feed" do
129
+ @rss2.generator.should == "Weblog Editor 2.0"
130
+ end
131
+ end
132
+
133
+ describe '#format' do
134
+ it "should be :atom for an atom feed" do
135
+ @atom.format.should == :atom
136
+ end
137
+
138
+ it "should be :rss2 for an rss2 feed" do
139
+ @rss2.format.should == :rss2
140
+ end
141
+ end
142
+
143
+ describe '#author.name' do
144
+ it "should be valid for an atom feed" do
145
+ @atom.author.name.should == "Frank"
146
+ end
147
+
148
+ it "should be valid for an rss2 feed" do
149
+ @rss2.author.name.should == "Mary Jo"
150
+ end
151
+ end
152
+
153
+ describe '#author.email' do
154
+ it "should be valid for an atom feed" do
155
+ @atom.author.email.should == "frank@imaginary.host"
156
+ end
157
+
158
+ it "should be valid for an rss2 feed" do
159
+ @rss2.author.email.should == "editor@example.com"
160
+ end
161
+ end
162
+
163
+ describe '#author.uri' do
164
+ it "should be valid for an atom feed" do
165
+ @atom.author.uri.should == "http://imaginary.host/students/frank"
166
+ end
167
+
168
+ it "should be nil for an rss2 feed" do
169
+ @rss2.author.uri.should be_nil
170
+ end
171
+ end
172
+
173
+ describe '#entries' do
174
+ it "should return an array of entries for an atom feed" do
175
+ @atom.entries.should be_an_instance_of(Array)
176
+ end
177
+
178
+ it "should have the correct length for an atom feed" do
179
+ @atom.should have(3).entries
180
+ end
181
+
182
+ it "should return items that are properly parsed for an atom feed" do
183
+ @atom.entries.first.title.should == "First title"
184
+ @atom.entries.first.url.should == "http://imaginary.host/posts/3"
185
+ end
186
+
187
+ it "should return an array of entries for an rss2 feed" do
188
+ @rss2.entries.should be_an_instance_of(Array)
189
+ end
190
+
191
+ it "should have the correct length for an rss2 feed" do
192
+ @rss2.should have(4).entries
193
+ end
194
+
195
+ it "should return items that are properly parsed for an rss2 feed" do
196
+ @rss2.entries.first.title.should == "Star City"
197
+ @rss2.entries.first.url.should == "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
198
+ @rss2.entries.first.item_id.should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
199
+ end
200
+
201
+ it "should allow items to be read more than once" do
202
+ item = @rss2.entries.first
203
+ item.item_id.should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
204
+ item.item_id.should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
205
+ end
206
+ end
207
+
208
+ describe '#to_hash' do
209
+ it "should serialize the title of an atom feed" do
210
+ @atom.to_hash[:title].should == "Test feed"
211
+ end
212
+
213
+ it "should serialize the description of an atom feed" do
214
+ @atom.to_hash[:description].should == "Monkey test feed"
215
+ end
216
+
217
+ it "should serialize the feed_id of an atom feed" do
218
+ @atom.to_hash[:feed_id].should == "tag:imaginary.host:nyheter"
219
+ end
220
+
221
+ it "should serialize the updated_at time of an atom feed" do
222
+ @atom.to_hash[:updated_at].should == Time.utc(2008, 3, 7, 20, 41, 10)
223
+ end
224
+
225
+ it "should serialize the href of an atom feed" do
226
+ @atom.to_hash[:href].should == "http://imaginary.host/posts.atom"
227
+ end
228
+
229
+ it "should serialize the url of an atom feed" do
230
+ @atom.to_hash[:url].should == "http://imaginary.host/posts"
231
+ end
232
+
233
+ it "should serialize the generator of an atom feed" do
234
+ @atom.to_hash[:generator].should == "Roll your own"
235
+ end
236
+
237
+ it "should serialize the entries of an atom feed" do
238
+ @atom.to_hash[:entries].should be_an_instance_of(Array)
239
+ @atom.to_hash[:entries].first.title.should == "First title"
240
+ @atom.to_hash[:entries].first.url.should == "http://imaginary.host/posts/3"
241
+ end
242
+
243
+ it "should serialize the author of an atom feed" do
244
+ author = @atom.to_hash[:author]
245
+
246
+ author.name.should == "Frank"
247
+ author.email.should == "frank@imaginary.host"
248
+ author.uri.should == "http://imaginary.host/students/frank"
249
+ end
250
+
251
+ it "should serialize the title of an rss2 feed" do
252
+ @rss2.to_hash[:title].should == "Lift Off News"
253
+ end
254
+
255
+ it "should serialize the description of an rss2 feed" do
256
+ @rss2.to_hash[:description].should == "Liftoff to Space Exploration."
257
+ end
258
+
259
+ it "should serialize the feed_id of an rss2 feed" do
260
+ @rss2.to_hash[:feed_id].should be_nil
261
+ end
262
+
263
+ it "should serialize the updated_at time of an rss2 feed" do
264
+ @rss2.to_hash[:updated_at].should == Time.utc(2003, 6, 10, 9, 41, 1)
265
+ end
266
+
267
+ it "should serialize the href of an rss2 feed" do
268
+ @rss2.to_hash[:href].should be_nil
269
+ end
270
+
271
+ it "should serialize the url of an rss2 feed" do
272
+ @rss2.to_hash[:url].should == "http://liftoff.msfc.nasa.gov/"
273
+ end
274
+
275
+ it "should serialize the generator of an rss2 feed" do
276
+ @rss2.to_hash[:generator].should == "Weblog Editor 2.0"
277
+ end
278
+
279
+ it "should serialize the entries of an rss2 feed" do
280
+ @rss2.to_hash[:entries].should be_an_instance_of(Array)
281
+ @rss2.to_hash[:entries].first.title.should == "Star City"
282
+ @rss2.to_hash[:entries].first.url.should == "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
283
+ end
284
+
285
+ it "should serialize the author of an rss2 feed" do
286
+
287
+ author = @rss2.to_hash[:author]
288
+
289
+ author.name.should == "Mary Jo"
290
+ author.email.should == "editor@example.com"
291
+ author.uri.should be_nil
292
+ end
293
+ end
294
+
295
+ end
@@ -0,0 +1,188 @@
1
+ require File.join( File.dirname(__FILE__), "spec_helper" )
2
+
3
+ require 'feed_me'
4
+
5
+ describe FeedMe::ItemParser do
6
+
7
+ before :each do
8
+ @atom_feed = FeedMe::FeedParser.open(fixture('welformed.atom'))
9
+ @atom = FeedMe::ItemParser.build(@atom_feed.root_node.search('/entry').first, :atom, @atom_feed)
10
+ @rss2_feed = FeedMe::FeedParser.open(fixture('welformed.rss2'))
11
+ @rss2 = FeedMe::ItemParser.build(@rss2_feed.root_node.search('/item').first, :rss2, @rss2_feed)
12
+ end
13
+
14
+ describe '#to_hash' do
15
+ it "should serialize the parsed properties to a hash" do
16
+
17
+ end
18
+ end
19
+
20
+ describe '#title' do
21
+ it "should be valid for an atom feed" do
22
+ @atom.title.should == "First title"
23
+ end
24
+
25
+ it "should be valid for an rss2 feed" do
26
+ @rss2.title.should == "Star City"
27
+ end
28
+ end
29
+
30
+ describe '#content' do
31
+ it "should be valid for an atom feed" do
32
+ @atom.content.should == "Here be content"
33
+ end
34
+
35
+ it "should be valid for an rss2 feed" do
36
+ @rss2.content.should == "This is content"
37
+ end
38
+ end
39
+
40
+ describe '#item_id' do
41
+ it "should be valid for an atom feed" do
42
+ @atom.item_id.should == "tag:imaginary.host,2008-03-07:nyheter/3"
43
+ end
44
+
45
+ it "should be valid for an rss2 feed" do
46
+ @rss2.item_id.should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
47
+ end
48
+ end
49
+
50
+ describe '#updated_at' do
51
+ it "should be valid for an atom feed" do
52
+ @atom.updated_at.should == Time.utc( 2008, 3, 7, 20, 41, 10 )
53
+ end
54
+
55
+ it "should be valid for an rss2 feed" do
56
+ @rss2.updated_at.should == Time.utc(2003, 6, 3, 9, 39, 21)
57
+ end
58
+ end
59
+
60
+ describe '#url' do
61
+ it "should be valid for an atom feed" do
62
+ @atom.url.should == "http://imaginary.host/posts/3"
63
+ end
64
+
65
+ it "should be valid for an rss2 feed" do
66
+ @rss2.url.should == "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
67
+ end
68
+ end
69
+
70
+ describe '#format' do
71
+ it "should be :atom for an atom feed" do
72
+ @atom.format.should == :atom
73
+ end
74
+
75
+ it "should be :rss2 for an rss2 feed" do
76
+ @rss2.format.should == :rss2
77
+ end
78
+ end
79
+
80
+ describe '#categories' do
81
+ it "should be correct for an rss2 feed" do
82
+ @rss2.categories.should == ['news', 'chuck']
83
+ end
84
+ end
85
+
86
+ describe '#author.name' do
87
+ it "should be valid for an atom feed" do
88
+ @atom.author.name.should == "Jonas Nicklas"
89
+ end
90
+
91
+ it "should be valid for an rss2 feed" do
92
+ @rss2.author.name.should == "Chuck Norris"
93
+ end
94
+ end
95
+
96
+ describe '#author.email' do
97
+ it "should be valid for an atom feed" do
98
+ @atom.author.email.should == "jonas.nicklas@imaginary.host"
99
+ end
100
+
101
+ it "should be valid for an rss2 feed" do
102
+ @rss2.author.email.should == "da_man@example.com"
103
+ end
104
+ end
105
+
106
+ describe '#author.uri' do
107
+ it "should be valid for an atom feed" do
108
+ @atom.author.uri.should == "http://imaginary.host/students/jnicklas"
109
+ end
110
+
111
+ it "should be nil for an rss2 feed" do
112
+ @rss2.author.uri.should be_nil
113
+ end
114
+ end
115
+
116
+ describe '#to_hash' do
117
+
118
+ it "should serialize the title for an atom feed" do
119
+ @atom.to_hash[:title].should == "First title"
120
+ end
121
+
122
+ it "should serialize the item_id for an atom feed" do
123
+ @atom.to_hash[:item_id].should == "tag:imaginary.host,2008-03-07:nyheter/3"
124
+ end
125
+
126
+ it "should serialize updated_at for an atom feed" do
127
+ @atom.to_hash[:updated_at].should == Time.utc( 2008, 3, 7, 20, 41, 10 )
128
+ end
129
+
130
+ it "should serialize the url for an atom feed" do
131
+ @atom.to_hash[:url].should == "http://imaginary.host/posts/3"
132
+ end
133
+
134
+ it "should serialize the author of an atom feed" do
135
+ author = @atom.to_hash[:author]
136
+
137
+ author.name.should == "Jonas Nicklas"
138
+ author.email.should == "jonas.nicklas@imaginary.host"
139
+ author.uri.should == "http://imaginary.host/students/jnicklas"
140
+ end
141
+
142
+ it "should serialize the title for an rss2 feed" do
143
+ @rss2.to_hash[:title].should == "Star City"
144
+ end
145
+
146
+ it "should serialize the item_id for an rss2 feed" do
147
+ @rss2.to_hash[:item_id].should == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
148
+ end
149
+
150
+ it "should serialize updated_at for an rss2 feed" do
151
+ @rss2.to_hash[:updated_at].should == Time.utc(2003, 6, 3, 9, 39, 21)
152
+ end
153
+
154
+ it "should serialize the url for an rss2 feed" do
155
+ @rss2.to_hash[:url].should == "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
156
+ end
157
+
158
+ it "should serialize the author of an rss2 feed" do
159
+ author = @rss2.to_hash[:author]
160
+
161
+ author.name.should == "Chuck Norris"
162
+ author.email.should == "da_man@example.com"
163
+ author.uri.should be_nil
164
+ end
165
+ end
166
+
167
+ end
168
+
169
+ describe "Without an author", FeedMe::ItemParser do
170
+
171
+ before :each do
172
+ @atom_feed = FeedMe::FeedParser.open(fixture('welformed.atom'))
173
+ @atom = FeedMe::ItemParser.build(@atom_feed.root_node.search('/entry')[1], :atom, @atom_feed)
174
+ @rss2_feed = FeedMe::FeedParser.open(fixture('welformed.rss2'))
175
+ @rss2 = FeedMe::ItemParser.build(@rss2_feed.root_node.search('/item')[1], :rss2, @rss2_feed)
176
+ end
177
+
178
+ describe '#author.name' do
179
+ it "should be valid for an atom feed" do
180
+ @atom.author.name.should be_nil
181
+ end
182
+
183
+ it "should be valid for an rss2 feed" do
184
+ @rss2.author.name.should be_nil
185
+ end
186
+ end
187
+
188
+ end
@@ -0,0 +1,14 @@
1
+ require File.join( File.dirname(__FILE__), "spec_helper" )
2
+
3
+ require 'feed_me'
4
+
5
+ describe FeedMe::SimpleStruct do
6
+
7
+ it "should append methods" do
8
+ struct = FeedMe::SimpleStruct.new(:foo => "blah", :bar => 23)
9
+
10
+ struct.foo.should == "blah"
11
+ struct.bar.should == 23
12
+ end
13
+
14
+ end
@@ -0,0 +1,25 @@
1
+ $TESTING=true
2
+ $:.push File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'rubygems'
5
+ # require 'ruby-debug'
6
+
7
+ module Fixtures
8
+
9
+ def fixture_path
10
+ File.join(File.dirname(__FILE__), 'fixtures')
11
+ end
12
+
13
+ def fixture(name)
14
+ File.join(fixture_path, name)
15
+ end
16
+
17
+ def hpricot_fixture(name)
18
+ Hpricot.XML(open(fixture(name)).read)
19
+ end
20
+
21
+ end
22
+
23
+ Spec::Runner.configure do |config|
24
+ config.include(Fixtures)
25
+ end
metadata ADDED
@@ -0,0 +1,81 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: feed_me
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.5
5
+ platform: ruby
6
+ authors:
7
+ - Jonas Nicklas
8
+ - Jonathan Stott
9
+ autorequire: feed_me
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2009-11-06 00:00:00 +00:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: hpricot
18
+ type: :runtime
19
+ version_requirement:
20
+ version_requirements: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: "0"
25
+ version:
26
+ description: Nice and simple RSS and atom feed parsing built on hpricot
27
+ email: jonas.nicklas@gmail.com
28
+ executables: []
29
+
30
+ extensions: []
31
+
32
+ extra_rdoc_files:
33
+ - LICENSE
34
+ - README
35
+ - TODO
36
+ files:
37
+ - LICENSE
38
+ - README
39
+ - Rakefile
40
+ - TODO
41
+ - lib/feed_me.rb
42
+ - lib/feed_me/abstract_parser.rb
43
+ - lib/feed_me/consts.rb
44
+ - lib/feed_me/feed_parser.rb
45
+ - lib/feed_me/feed_struct.rb
46
+ - lib/feed_me/item_parser.rb
47
+ - lib/feed_me/merbtasks.rb
48
+ - lib/feed_me/simple_struct.rb
49
+ has_rdoc: true
50
+ homepage: http://github.com/jnicklas/feed_me
51
+ licenses: []
52
+
53
+ post_install_message:
54
+ rdoc_options:
55
+ - --charset=UTF-8
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: "0"
63
+ version:
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: "0"
69
+ version:
70
+ requirements: []
71
+
72
+ rubyforge_project:
73
+ rubygems_version: 1.3.5
74
+ signing_key:
75
+ specification_version: 3
76
+ summary: Nice and simple RSS and atom feed parsing built on hpricot
77
+ test_files:
78
+ - spec/feed_parser_spec.rb
79
+ - spec/item_parser_spec.rb
80
+ - spec/simple_struct_spec.rb
81
+ - spec/spec_helper.rb