Spectives-feedzirra 0.0.28
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +169 -0
- data/README.textile +205 -0
- data/Rakefile +56 -0
- data/lib/core_ext/date.rb +21 -0
- data/lib/core_ext/string.rb +9 -0
- data/lib/feedzirra/feed.rb +334 -0
- data/lib/feedzirra/feed_entry_utilities.rb +45 -0
- data/lib/feedzirra/feed_utilities.rb +71 -0
- data/lib/feedzirra/parser/atom.rb +35 -0
- data/lib/feedzirra/parser/atom_entry.rb +41 -0
- data/lib/feedzirra/parser/itunes_category.rb +12 -0
- data/lib/feedzirra/parser/mrss_category.rb +11 -0
- data/lib/feedzirra/parser/mrss_content.rb +48 -0
- data/lib/feedzirra/parser/mrss_copyright.rb +10 -0
- data/lib/feedzirra/parser/mrss_credit.rb +11 -0
- data/lib/feedzirra/parser/mrss_group.rb +37 -0
- data/lib/feedzirra/parser/mrss_hash.rb +10 -0
- data/lib/feedzirra/parser/mrss_player.rb +11 -0
- data/lib/feedzirra/parser/mrss_rating.rb +10 -0
- data/lib/feedzirra/parser/mrss_restriction.rb +11 -0
- data/lib/feedzirra/parser/mrss_text.rb +13 -0
- data/lib/feedzirra/parser/mrss_thumbnail.rb +11 -0
- data/lib/feedzirra/parser/rss.rb +83 -0
- data/lib/feedzirra/parser/rss_entry.rb +83 -0
- data/lib/feedzirra/parser/rss_image.rb +15 -0
- data/lib/feedzirra.rb +44 -0
- data/spec/benchmarks/feed_benchmarks.rb +98 -0
- data/spec/benchmarks/feedzirra_benchmarks.rb +40 -0
- data/spec/benchmarks/fetching_benchmarks.rb +28 -0
- data/spec/benchmarks/parsing_benchmark.rb +30 -0
- data/spec/benchmarks/updating_benchmarks.rb +33 -0
- data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
- data/spec/feedzirra/feed_spec.rb +546 -0
- data/spec/feedzirra/feed_utilities_spec.rb +149 -0
- data/spec/feedzirra/parser/atom_entry_spec.rb +49 -0
- data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +42 -0
- data/spec/feedzirra/parser/atom_feed_burner_spec.rb +39 -0
- data/spec/feedzirra/parser/atom_spec.rb +43 -0
- data/spec/feedzirra/parser/mrss_content_spec.rb +32 -0
- data/spec/feedzirra/parser/rss_entry_spec.rb +154 -0
- data/spec/feedzirra/parser/rss_spec.rb +93 -0
- data/spec/sample_feeds/run_against_sample.rb +20 -0
- data/spec/spec_helper.rb +62 -0
- metadata +154 -0
@@ -0,0 +1,149 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe Feedzirra::FeedUtilities do
|
4
|
+
before(:each) do
|
5
|
+
@klass = Class.new do
|
6
|
+
include Feedzirra::FeedUtilities
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
describe "instance methods" do
|
11
|
+
it "should provide an updated? accessor" do
|
12
|
+
feed = @klass.new
|
13
|
+
feed.should_not be_updated
|
14
|
+
feed.updated = true
|
15
|
+
feed.should be_updated
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should provide a new_entries accessor" do
|
19
|
+
feed = @klass.new
|
20
|
+
feed.new_entries.should == []
|
21
|
+
feed.new_entries = [:foo]
|
22
|
+
feed.new_entries.should == [:foo]
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should provide an etag accessor" do
|
26
|
+
feed = @klass.new
|
27
|
+
feed.etag = "foo"
|
28
|
+
feed.etag.should == "foo"
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should provide a last_modified accessor" do
|
32
|
+
feed = @klass.new
|
33
|
+
time = Time.now
|
34
|
+
feed.last_modified = time
|
35
|
+
feed.last_modified.should == time
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should return new_entries? as true when entries are put into new_entries" do
|
39
|
+
feed = @klass.new
|
40
|
+
feed.new_entries << :foo
|
41
|
+
feed.should have_new_entries
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should return a last_modified value from the entry with the most recent published date if the last_modified date hasn't been set" do
|
45
|
+
feed = Feedzirra::Parser::Atom.new
|
46
|
+
entry =Feedzirra::Parser::AtomEntry.new
|
47
|
+
entry.published = Time.now.to_s
|
48
|
+
feed.entries << entry
|
49
|
+
feed.last_modified.should == entry.published
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should not throw an error if one of the entries has published date of nil" do
|
53
|
+
feed = Feedzirra::Parser::Atom.new
|
54
|
+
entry = Feedzirra::Parser::AtomEntry.new
|
55
|
+
entry.published = Time.now.to_s
|
56
|
+
feed.entries << entry
|
57
|
+
feed.entries << Feedzirra::Parser::AtomEntry.new
|
58
|
+
feed.last_modified.should == entry.published
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
describe "#update_from_feed" do
|
63
|
+
describe "updating feed attributes" do
|
64
|
+
before(:each) do
|
65
|
+
# I'm using the Atom class when I know I should be using a different one. However, this update_from_feed
|
66
|
+
# method would only be called against a feed item.
|
67
|
+
@feed = Feedzirra::Parser::Atom.new
|
68
|
+
@feed.title = "A title"
|
69
|
+
@feed.url = "http://pauldix.net"
|
70
|
+
@feed.feed_url = "http://feeds.feedburner.com/PaulDixExplainsNothing"
|
71
|
+
@feed.updated = false
|
72
|
+
@updated_feed = @feed.dup
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should update the title if changed" do
|
76
|
+
@updated_feed.title = "new title"
|
77
|
+
@feed.update_from_feed(@updated_feed)
|
78
|
+
@feed.title.should == @updated_feed.title
|
79
|
+
@feed.should be_updated
|
80
|
+
end
|
81
|
+
|
82
|
+
it "should not update the title if the same" do
|
83
|
+
@feed.update_from_feed(@updated_feed)
|
84
|
+
@feed.should_not be_updated
|
85
|
+
end
|
86
|
+
|
87
|
+
it "should update the feed_url if changed" do
|
88
|
+
@updated_feed.feed_url = "a new feed url"
|
89
|
+
@feed.update_from_feed(@updated_feed)
|
90
|
+
@feed.feed_url.should == @updated_feed.feed_url
|
91
|
+
@feed.should be_updated
|
92
|
+
end
|
93
|
+
|
94
|
+
it "should not update the feed_url if the same" do
|
95
|
+
@feed.update_from_feed(@updated_feed)
|
96
|
+
@feed.should_not be_updated
|
97
|
+
end
|
98
|
+
|
99
|
+
it "should update the url if changed" do
|
100
|
+
@updated_feed.url = "a new url"
|
101
|
+
@feed.update_from_feed(@updated_feed)
|
102
|
+
@feed.url.should == @updated_feed.url
|
103
|
+
end
|
104
|
+
|
105
|
+
it "should not update the url if not changed" do
|
106
|
+
@feed.update_from_feed(@updated_feed)
|
107
|
+
@feed.should_not be_updated
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
describe "updating entries" do
|
112
|
+
before(:each) do
|
113
|
+
# I'm using the Atom class when I know I should be using a different one. However, this update_from_feed
|
114
|
+
# method would only be called against a feed item.
|
115
|
+
@feed = Feedzirra::Parser::Atom.new
|
116
|
+
@feed.title = "A title"
|
117
|
+
@feed.url = "http://pauldix.net"
|
118
|
+
@feed.feed_url = "http://feeds.feedburner.com/PaulDixExplainsNothing"
|
119
|
+
@feed.updated = false
|
120
|
+
@updated_feed = @feed.dup
|
121
|
+
@old_entry = Feedzirra::Parser::AtomEntry.new
|
122
|
+
@old_entry.url = "http://pauldix.net/old.html"
|
123
|
+
@old_entry.published = Time.now.to_s
|
124
|
+
@new_entry = Feedzirra::Parser::AtomEntry.new
|
125
|
+
@new_entry.url = "http://pauldix.net/new.html"
|
126
|
+
@new_entry.published = (Time.now + 10).to_s
|
127
|
+
@feed.entries << @old_entry
|
128
|
+
@updated_feed.entries << @new_entry
|
129
|
+
@updated_feed.entries << @old_entry
|
130
|
+
end
|
131
|
+
|
132
|
+
it "should update last-modified from the latest entry date" do
|
133
|
+
@feed.update_from_feed(@updated_feed)
|
134
|
+
@feed.last_modified.should == @new_entry.published
|
135
|
+
end
|
136
|
+
|
137
|
+
it "should put new entries into new_entries" do
|
138
|
+
@feed.update_from_feed(@updated_feed)
|
139
|
+
@feed.new_entries.should == [@new_entry]
|
140
|
+
end
|
141
|
+
|
142
|
+
it "should also put new entries into the entries collection" do
|
143
|
+
@feed.update_from_feed(@updated_feed)
|
144
|
+
@feed.entries.should include(@new_entry)
|
145
|
+
@feed.entries.should include(@old_entry)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
|
2
|
+
|
3
|
+
describe Feedzirra::Parser::AtomEntry do
|
4
|
+
before(:each) do
|
5
|
+
# I don't really like doing it this way because these unit test should only rely on AtomEntry,
|
6
|
+
# but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
|
7
|
+
@entry = Feedzirra::Parser::Atom.parse(sample_atom_feed).entries.first
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should parse the title" do
|
11
|
+
@entry.title.should == "AWS Job: Architect & Designer Position in Turkey"
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should parse the url" do
|
15
|
+
@entry.url.should == "http://aws.typepad.com/aws/2009/01/aws-job-architect-designer-position-in-turkey.html"
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should parse the url even when" do
|
19
|
+
Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).entries.first.url.should == "http://www.innoq.com/blog/phaus/2009/07/ja.html"
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should parse the author" do
|
23
|
+
@entry.author.should == "AWS Editor"
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should parse the content" do
|
27
|
+
@entry.content.should == sample_atom_entry_content
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should provide a summary" do
|
31
|
+
@entry.summary.should == "Late last year an entrepreneur from Turkey visited me at Amazon HQ in Seattle. We talked about his plans to use AWS as part of his new social video portal startup. I won't spill any beans before he's ready to..."
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should parse the published date" do
|
35
|
+
@entry.published.to_s.should == "Fri Jan 16 18:21:00 UTC 2009"
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should parse the categories" do
|
39
|
+
@entry.categories.should == ['Turkey', 'Seattle']
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should parse the updated date" do
|
43
|
+
@entry.updated.to_s.should == "Fri Jan 16 18:21:00 UTC 2009"
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should parse the id" do
|
47
|
+
@entry.id.should == "tag:typepad.com,2003:post-61484736"
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
|
2
|
+
|
3
|
+
describe Feedzirra::Parser::AtomFeedBurnerEntry do
|
4
|
+
before(:each) do
|
5
|
+
# I don't really like doing it this way because these unit test should only rely on AtomEntry,
|
6
|
+
# but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
|
7
|
+
@entry = Feedzirra::Parser::AtomFeedBurner.parse(sample_feedburner_atom_feed).entries.first
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should parse the title" do
|
11
|
+
@entry.title.should == "Making a Ruby C library even faster"
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should be able to fetch a url via the 'alternate' rel if no origLink exists" do
|
15
|
+
entry = Feedzirra::Parser::AtomFeedBurner.parse(File.read("#{File.dirname(__FILE__)}/../../sample_feeds/PaulDixExplainsNothingAlternate.xml")).entries.first
|
16
|
+
entry.url.should == 'http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~3/519925023/making-a-ruby-c-library-even-faster.html'
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should parse the url" do
|
20
|
+
@entry.url.should == "http://www.pauldix.net/2009/01/making-a-ruby-c-library-even-faster.html"
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should parse the author" do
|
24
|
+
@entry.author.should == "Paul Dix"
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should parse the content" do
|
28
|
+
@entry.content.should == sample_feedburner_atom_entry_content
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should provide a summary" do
|
32
|
+
@entry.summary.should == "Last week I released the first version of a SAX based XML parsing library called SAX-Machine. It uses Nokogiri, which uses libxml, so it's pretty fast. However, I felt that it could be even faster. The only question was how..."
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should parse the published date" do
|
36
|
+
@entry.published.to_s.should == "Thu Jan 22 15:50:22 UTC 2009"
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should parse the categories" do
|
40
|
+
@entry.categories.should == ['Ruby', 'Another Category']
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
|
2
|
+
|
3
|
+
describe Feedzirra::Parser::AtomFeedBurner do
|
4
|
+
describe "#will_parse?" do
|
5
|
+
it "should return true for a feedburner atom feed" do
|
6
|
+
Feedzirra::Parser::AtomFeedBurner.should be_able_to_parse(sample_feedburner_atom_feed)
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should return false for an rdf feed" do
|
10
|
+
Feedzirra::Parser::AtomFeedBurner.should_not be_able_to_parse(sample_rdf_feed)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should return false for a regular atom feed" do
|
14
|
+
Feedzirra::Parser::AtomFeedBurner.should_not be_able_to_parse(sample_atom_feed)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe "parsing" do
|
19
|
+
before(:each) do
|
20
|
+
@feed = Feedzirra::Parser::AtomFeedBurner.parse(sample_feedburner_atom_feed)
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should parse the title" do
|
24
|
+
@feed.title.should == "Paul Dix Explains Nothing"
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should parse the url" do
|
28
|
+
@feed.url.should == "http://www.pauldix.net/"
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should parse the feed_url" do
|
32
|
+
@feed.feed_url.should == "http://feeds.feedburner.com/PaulDixExplainsNothing"
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should parse entries" do
|
36
|
+
@feed.entries.size.should == 5
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
|
2
|
+
|
3
|
+
describe Feedzirra::Parser::Atom do
|
4
|
+
describe "#will_parse?" do
|
5
|
+
it "should return true for an atom feed" do
|
6
|
+
Feedzirra::Parser::Atom.should be_able_to_parse(sample_atom_feed)
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should return false for an rdf feed" do
|
10
|
+
Feedzirra::Parser::Atom.should_not be_able_to_parse(sample_rdf_feed)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
describe "parsing" do
|
15
|
+
before(:each) do
|
16
|
+
@feed = Feedzirra::Parser::Atom.parse(sample_atom_feed)
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should parse the title" do
|
20
|
+
@feed.title.should == "Amazon Web Services Blog"
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should parse the url" do
|
24
|
+
@feed.url.should == "http://aws.typepad.com/aws/"
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should parse the url even when it doesn't have the type='text/html' attribute" do
|
28
|
+
Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).url.should == "http://www.innoq.com/planet/"
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should parse the feed_url even when it doesn't have the type='application/atom+xml' attribute" do
|
32
|
+
Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).feed_url.should == "http://www.innoq.com/planet/atom.xml"
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should parse the feed_url" do
|
36
|
+
@feed.feed_url.should == "http://aws.typepad.com/aws/atom.xml"
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should parse entries" do
|
40
|
+
@feed.entries.size.should == 10
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
|
2
|
+
|
3
|
+
describe Feedzirra::Parser::RSSEntry::MRSSContent do
|
4
|
+
before(:each) do
|
5
|
+
# I don't really like doing it this way because these unit test should only rely on RSSEntry,
|
6
|
+
# but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
|
7
|
+
@entries = Feedzirra::Parser::RSS.parse(sample_mrss_feed).entries
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should parse the media" do
|
11
|
+
entry = @entries.first
|
12
|
+
entry.media_content.size.should == 1
|
13
|
+
entry.media_description.should == 'The story began with a July 23 article in a local newspaper, The Independent. Jenna Hewitt, 26, of Montauk, and three friends said they found the ...'
|
14
|
+
entry.media_thumbnail.should == 'http://3.gvt0.com/vi/Y3rNEu4A8WM/default.jpg'
|
15
|
+
entry.media_thumbnail_width.should == '320'
|
16
|
+
entry.media_thumbnail_height.should == '240'
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should handle multiple pieces of content" do
|
20
|
+
media = @entries[1].media_content
|
21
|
+
media.size.should == 2
|
22
|
+
media[0].url.should == 'http://www.youtube.com/v/pvaM6sjLbuA&fs=1'
|
23
|
+
media[0].content_type.should == 'application/x-shockwave-flash'
|
24
|
+
media[0].medium.should == 'video'
|
25
|
+
media[0].duration.should == '575'
|
26
|
+
|
27
|
+
media[1].url.should == 'http://www.youtube.com/v/pvaM6sjLbuA&fs=2'
|
28
|
+
media[1].content_type.should == 'video/mp4'
|
29
|
+
media[1].medium.should == 'video'
|
30
|
+
media[1].duration.should == '576'
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
|
2
|
+
|
3
|
+
describe Feedzirra::Parser::RSSEntry do
|
4
|
+
before do
|
5
|
+
# I don't really like doing it this way because these unit test should only rely on RSSEntry,
|
6
|
+
# but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
|
7
|
+
@entry = Feedzirra::Parser::RSS.parse(sample_rss_feed).entries.first
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should parse the title" do
|
11
|
+
@entry.title.should == "Nokogiri’s Slop Feature"
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should parse the url" do
|
15
|
+
@entry.url.should == "http://tenderlovemaking.com/2008/12/04/nokogiris-slop-feature/"
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should parse the author" do
|
19
|
+
@entry.author.should == "Aaron Patterson"
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should parse the content" do
|
23
|
+
@entry.content.should == sample_rss_entry_content
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should provide a summary" do
|
27
|
+
@entry.summary.should == "Oops! When I released nokogiri version 1.0.7, I totally forgot to talk about Nokogiri::Slop() feature that was added. Why is it called \"slop\"? It lets you sloppily explore documents. Basically, it decorates your document with method_missing() that allows you to search your document via method calls.\nGiven this document:\n\ndoc = Nokogiri::Slop(<<-eohtml)\n<html>\n  <body>\n  [...]"
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should parse the published date" do
|
31
|
+
@entry.published.to_s.should == "Thu Dec 04 17:17:49 UTC 2008"
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should parse the categories" do
|
35
|
+
@entry.categories.should == ['computadora', 'nokogiri', 'rails']
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should parse the guid as id" do
|
39
|
+
@entry.id.should == "http://tenderlovemaking.com/?p=198"
|
40
|
+
end
|
41
|
+
|
42
|
+
describe "parsing an iTunes feed" do
|
43
|
+
before do
|
44
|
+
@item = Feedzirra::Parser::RSS.parse(sample_itunes_feed).entries.first
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should parse the title" do
|
48
|
+
@item.title.should == "Shake Shake Shake Your Spices"
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should parse the author" do
|
52
|
+
@item.author.should == "John Doe"
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should parse the subtitle" do
|
56
|
+
@item.subtitle.should == "A short primer on table spices"
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should parse the summary" do
|
60
|
+
@item.summary.should == "This week we talk about salt and pepper shakers, comparing and contrasting pour rates, construction materials, and overall aesthetics. Come and join the party!"
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should parse the enclosure" do
|
64
|
+
@item.enclosure_length.should == "8727310"
|
65
|
+
@item.enclosure_type.should == "audio/x-m4a"
|
66
|
+
@item.enclosure_url.should == "http://example.com/podcasts/everything/AllAboutEverythingEpisode3.m4a"
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should parse the id" do
|
70
|
+
@item.id.should == "http://example.com/podcasts/archive/aae20050615.m4a"
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should parse the published date" do
|
74
|
+
@item.published.should == Time.parse('Wed Jun 15 19:00:00 UTC 2005')
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should parse the duration" do
|
78
|
+
@item.duration.should == "7:04"
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should parse the keywords" do
|
82
|
+
@item.keywords.should == "salt, pepper, shaker, exciting"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
describe "parsing Media RSS" do
|
87
|
+
before do
|
88
|
+
@item = Feedzirra::Parser::RSS.parse(sample_mrss_feed).entries.first
|
89
|
+
end
|
90
|
+
|
91
|
+
it "should parse media:rating" do
|
92
|
+
@item.rating.should == 'adult'
|
93
|
+
@item.rating_scheme.should == 'urn:simple'
|
94
|
+
end
|
95
|
+
|
96
|
+
it "should parse media:title" do
|
97
|
+
@item.media_title.should == 'The Montauk Monster-Hells Visits New York!'
|
98
|
+
end
|
99
|
+
|
100
|
+
it "should parse media:description" do
|
101
|
+
@item.media_description.should == 'The story began with a July 23 article in a local newspaper, The Independent. Jenna Hewitt, 26, of Montauk, and three friends said they found the ...'
|
102
|
+
end
|
103
|
+
|
104
|
+
it "should parse media:keywords" do
|
105
|
+
@item.media_keywords.should == 'kitty, cat, big dog, yarn, fluffy'
|
106
|
+
end
|
107
|
+
|
108
|
+
it "should parse media:tumbnail" do
|
109
|
+
@item.media_content.size.should == 1
|
110
|
+
@item.media_description.should == 'The story began with a July 23 article in a local newspaper, The Independent. Jenna Hewitt, 26, of Montauk, and three friends said they found the ...'
|
111
|
+
@item.media_thumbnail.should == 'http://3.gvt0.com/vi/Y3rNEu4A8WM/default.jpg'
|
112
|
+
@item.media_thumbnail_width.should == '320'
|
113
|
+
@item.media_thumbnail_height.should == '240'
|
114
|
+
end
|
115
|
+
|
116
|
+
it "should parse media:category" do
|
117
|
+
@item.media_category.should == 'Arts/Movies/Titles/A/Ace_Ventura_Series/Ace_Ventura_-_Pet_Detective'
|
118
|
+
@item.media_category_scheme.should == 'http://dmoz.org'
|
119
|
+
@item.media_category_label.should == 'Ace Ventura - Pet Detective'
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should parse media:hash" do
|
123
|
+
@item.media_hash.should == 'dfdec888b72151965a34b4b59031290a'
|
124
|
+
@item.media_hash_algo.should == 'md5'
|
125
|
+
end
|
126
|
+
|
127
|
+
it "should parse media:player" do
|
128
|
+
@item.media_player_url.should == 'http://www.example.com/player?id=1111'
|
129
|
+
@item.media_player_width.should == '400'
|
130
|
+
@item.media_player_height.should == '200'
|
131
|
+
end
|
132
|
+
|
133
|
+
it "should parse media:credit" do
|
134
|
+
@item.credits.size.should == 2
|
135
|
+
@item.credits.first.role.should == 'producer'
|
136
|
+
@item.credits.first.scheme.should == 'urn:ebu'
|
137
|
+
pending 'not sure why the name isn\'t getting set'
|
138
|
+
@item.credits.first.name.should == 'John Doe'
|
139
|
+
end
|
140
|
+
|
141
|
+
it "should parse media:copyright" do
|
142
|
+
@item.copyright.should == '2009 Example Co.'
|
143
|
+
@item.copyright_url.should == 'http://example.com/copyright.html'
|
144
|
+
end
|
145
|
+
|
146
|
+
it "should parse media:restriction" do
|
147
|
+
pending 'need to figure out why this is getting String'
|
148
|
+
@item.media_restriction.type.should == 'MRSSRestriction'
|
149
|
+
@item.media_restriction.value.should == 'au us'
|
150
|
+
@item.media_restriction.scope.should == 'country'
|
151
|
+
@item.media_restriction.relationship.should == 'allow'
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
|
2
|
+
|
3
|
+
describe Feedzirra::Parser::RSS do
|
4
|
+
describe "#will_parse?" do
|
5
|
+
it "should return true for an RSS feed" do
|
6
|
+
Feedzirra::Parser::RSS.should be_able_to_parse(sample_rss_feed)
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should return true for an rdf feed" do
|
10
|
+
Feedzirra::Parser::RSS.should be_able_to_parse(sample_rdf_feed)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should return true for an iTunes feed" do
|
14
|
+
Feedzirra::Parser::RSS.should be_able_to_parse(sample_itunes_feed)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should return fase for an atom feed" do
|
18
|
+
Feedzirra::Parser::RSS.should_not be_able_to_parse(sample_atom_feed)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe "parsing" do
|
23
|
+
before do
|
24
|
+
@feed = Feedzirra::Parser::RSS.parse(sample_mrss_feed)
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should parse the title" do
|
28
|
+
@feed.title.should == "Google Video - Hot videos"
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should parse the url" do
|
32
|
+
@feed.url.should == "http://video.google.com/"
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should provide an accessor for the feed_url" do
|
36
|
+
@feed.respond_to?(:feed_url).should == true
|
37
|
+
@feed.respond_to?(:feed_url=).should == true
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should parse entries" do
|
41
|
+
@feed.entries.size.should == 20
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should parse the image" do
|
45
|
+
pending 'setting NilClass for some reason'
|
46
|
+
@feed.image.class.should == 'RSSImage'
|
47
|
+
@feed.image.title.should == 'Google Video - Hot videos'
|
48
|
+
@feed.image.link.should == 'http://video.google.com/'
|
49
|
+
@feed.image.url.should == 'http://video.google.com/common/google_logo_small.jpg'
|
50
|
+
@feed.image.width.should == '100'
|
51
|
+
@feed.image.height.should == '37'
|
52
|
+
end
|
53
|
+
|
54
|
+
describe "parsing an iTunes feed" do
|
55
|
+
before do
|
56
|
+
@feed = Feedzirra::Parser::RSS.parse(sample_itunes_feed)
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should parse an image" do
|
60
|
+
@feed.image.should == "http://example.com/podcasts/everything/AllAboutEverything.jpg"
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should parse categories" do
|
64
|
+
@feed.categories.size == 2
|
65
|
+
@feed.categories[0].should == "Technology"
|
66
|
+
@feed.categories[1].should == "Gadgets"
|
67
|
+
@feed.categories[2].should == "TV & Film"
|
68
|
+
|
69
|
+
# @feed.categories[0].name.should == "Technology"
|
70
|
+
# @feed.categories[0].sub_categories.size.should == 1
|
71
|
+
# @feed.categories[0].sub_categories[0].should == "Gadgets"
|
72
|
+
# @feed.categories[1].name.should == "TV & Film"
|
73
|
+
# @feed.categories[1].sub_categories.size.should == 0
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should parse the summary" do
|
77
|
+
@feed.summary.should == "All About Everything is a show about everything. Each week we dive into any subject known to man and talk about it as much as we can. Look for our Podcast in the iTunes Music Store"
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should parse entries" do
|
81
|
+
@feed.entries.size.should == 4
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should parse the owner name" do
|
85
|
+
@feed.owner_name.should == 'John Doe'
|
86
|
+
end
|
87
|
+
|
88
|
+
it "should parse the owner email" do
|
89
|
+
@feed.owner_email.should == 'john.doe@example.com'
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require File.dirname(__FILE__) + "/../../lib/feedzirra.rb"
|
3
|
+
|
4
|
+
feed_urls = File.readlines(File.dirname(__FILE__) + "/top5kfeeds.dat").collect {|line| line.split.first}
|
5
|
+
|
6
|
+
success = lambda do |url, feed|
|
7
|
+
puts "SUCCESS - #{feed.title} - #{url}"
|
8
|
+
end
|
9
|
+
|
10
|
+
failed_feeds = []
|
11
|
+
failure = lambda do |url, response_code, header, body|
|
12
|
+
failed_feeds << url if response_code == 200
|
13
|
+
puts "*********** FAILED with #{response_code} on #{url}"
|
14
|
+
end
|
15
|
+
|
16
|
+
Feedzirra::Feed.fetch_and_parse(feed_urls, :on_success => success, :on_failure => failure)
|
17
|
+
|
18
|
+
File.open("./failed_urls.txt", "w") do |f|
|
19
|
+
f.write failed_feeds.join("\n")
|
20
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "spec"
|
3
|
+
|
4
|
+
# gem install redgreen for colored test output
|
5
|
+
begin require "redgreen" unless ENV['TM_CURRENT_LINE']; rescue LoadError; end
|
6
|
+
|
7
|
+
path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
|
8
|
+
$LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
|
9
|
+
|
10
|
+
require "lib/feedzirra"
|
11
|
+
|
12
|
+
def load_sample(filename)
|
13
|
+
File.read("#{File.dirname(__FILE__)}/sample_feeds/#{filename}")
|
14
|
+
end
|
15
|
+
|
16
|
+
def sample_atom_feed
|
17
|
+
load_sample("AmazonWebServicesBlog.xml")
|
18
|
+
end
|
19
|
+
|
20
|
+
def sample_atom_entry_content
|
21
|
+
load_sample("AmazonWebServicesBlogFirstEntryContent.xml")
|
22
|
+
end
|
23
|
+
|
24
|
+
def sample_itunes_feed
|
25
|
+
load_sample("itunes.xml")
|
26
|
+
end
|
27
|
+
|
28
|
+
def sample_rdf_feed
|
29
|
+
load_sample("HREFConsideredHarmful.xml")
|
30
|
+
end
|
31
|
+
|
32
|
+
def sample_rdf_entry_content
|
33
|
+
load_sample("HREFConsideredHarmfulFirstEntry.xml")
|
34
|
+
end
|
35
|
+
|
36
|
+
def sample_rss_feed_burner_feed
|
37
|
+
load_sample("SamHarrisAuthorPhilosopherEssayistAtheist.xml")
|
38
|
+
end
|
39
|
+
|
40
|
+
def sample_rss_feed
|
41
|
+
load_sample("TenderLovemaking.xml")
|
42
|
+
end
|
43
|
+
|
44
|
+
def sample_rss_entry_content
|
45
|
+
load_sample("TenderLovemakingFirstEntry.xml")
|
46
|
+
end
|
47
|
+
|
48
|
+
def sample_feedburner_atom_feed
|
49
|
+
load_sample("PaulDixExplainsNothing.xml")
|
50
|
+
end
|
51
|
+
|
52
|
+
def sample_feedburner_atom_entry_content
|
53
|
+
load_sample("PaulDixExplainsNothingFirstEntryContent.xml")
|
54
|
+
end
|
55
|
+
|
56
|
+
def sample_wfw_feed
|
57
|
+
load_sample("PaulDixExplainsNothingWFW.xml")
|
58
|
+
end
|
59
|
+
|
60
|
+
def sample_mrss_feed
|
61
|
+
load_sample("GoogleVideoSample.xml")
|
62
|
+
end
|