Spectives-feedzirra 0.0.28

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/README.rdoc +169 -0
  2. data/README.textile +205 -0
  3. data/Rakefile +56 -0
  4. data/lib/core_ext/date.rb +21 -0
  5. data/lib/core_ext/string.rb +9 -0
  6. data/lib/feedzirra/feed.rb +334 -0
  7. data/lib/feedzirra/feed_entry_utilities.rb +45 -0
  8. data/lib/feedzirra/feed_utilities.rb +71 -0
  9. data/lib/feedzirra/parser/atom.rb +35 -0
  10. data/lib/feedzirra/parser/atom_entry.rb +41 -0
  11. data/lib/feedzirra/parser/itunes_category.rb +12 -0
  12. data/lib/feedzirra/parser/mrss_category.rb +11 -0
  13. data/lib/feedzirra/parser/mrss_content.rb +48 -0
  14. data/lib/feedzirra/parser/mrss_copyright.rb +10 -0
  15. data/lib/feedzirra/parser/mrss_credit.rb +11 -0
  16. data/lib/feedzirra/parser/mrss_group.rb +37 -0
  17. data/lib/feedzirra/parser/mrss_hash.rb +10 -0
  18. data/lib/feedzirra/parser/mrss_player.rb +11 -0
  19. data/lib/feedzirra/parser/mrss_rating.rb +10 -0
  20. data/lib/feedzirra/parser/mrss_restriction.rb +11 -0
  21. data/lib/feedzirra/parser/mrss_text.rb +13 -0
  22. data/lib/feedzirra/parser/mrss_thumbnail.rb +11 -0
  23. data/lib/feedzirra/parser/rss.rb +83 -0
  24. data/lib/feedzirra/parser/rss_entry.rb +83 -0
  25. data/lib/feedzirra/parser/rss_image.rb +15 -0
  26. data/lib/feedzirra.rb +44 -0
  27. data/spec/benchmarks/feed_benchmarks.rb +98 -0
  28. data/spec/benchmarks/feedzirra_benchmarks.rb +40 -0
  29. data/spec/benchmarks/fetching_benchmarks.rb +28 -0
  30. data/spec/benchmarks/parsing_benchmark.rb +30 -0
  31. data/spec/benchmarks/updating_benchmarks.rb +33 -0
  32. data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
  33. data/spec/feedzirra/feed_spec.rb +546 -0
  34. data/spec/feedzirra/feed_utilities_spec.rb +149 -0
  35. data/spec/feedzirra/parser/atom_entry_spec.rb +49 -0
  36. data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +42 -0
  37. data/spec/feedzirra/parser/atom_feed_burner_spec.rb +39 -0
  38. data/spec/feedzirra/parser/atom_spec.rb +43 -0
  39. data/spec/feedzirra/parser/mrss_content_spec.rb +32 -0
  40. data/spec/feedzirra/parser/rss_entry_spec.rb +154 -0
  41. data/spec/feedzirra/parser/rss_spec.rb +93 -0
  42. data/spec/sample_feeds/run_against_sample.rb +20 -0
  43. data/spec/spec_helper.rb +62 -0
  44. metadata +154 -0
@@ -0,0 +1,149 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe Feedzirra::FeedUtilities do
4
+ before(:each) do
5
+ @klass = Class.new do
6
+ include Feedzirra::FeedUtilities
7
+ end
8
+ end
9
+
10
+ describe "instance methods" do
11
+ it "should provide an updated? accessor" do
12
+ feed = @klass.new
13
+ feed.should_not be_updated
14
+ feed.updated = true
15
+ feed.should be_updated
16
+ end
17
+
18
+ it "should provide a new_entries accessor" do
19
+ feed = @klass.new
20
+ feed.new_entries.should == []
21
+ feed.new_entries = [:foo]
22
+ feed.new_entries.should == [:foo]
23
+ end
24
+
25
+ it "should provide an etag accessor" do
26
+ feed = @klass.new
27
+ feed.etag = "foo"
28
+ feed.etag.should == "foo"
29
+ end
30
+
31
+ it "should provide a last_modified accessor" do
32
+ feed = @klass.new
33
+ time = Time.now
34
+ feed.last_modified = time
35
+ feed.last_modified.should == time
36
+ end
37
+
38
+ it "should return new_entries? as true when entries are put into new_entries" do
39
+ feed = @klass.new
40
+ feed.new_entries << :foo
41
+ feed.should have_new_entries
42
+ end
43
+
44
+ it "should return a last_modified value from the entry with the most recent published date if the last_modified date hasn't been set" do
45
+ feed = Feedzirra::Parser::Atom.new
46
+ entry =Feedzirra::Parser::AtomEntry.new
47
+ entry.published = Time.now.to_s
48
+ feed.entries << entry
49
+ feed.last_modified.should == entry.published
50
+ end
51
+
52
+ it "should not throw an error if one of the entries has published date of nil" do
53
+ feed = Feedzirra::Parser::Atom.new
54
+ entry = Feedzirra::Parser::AtomEntry.new
55
+ entry.published = Time.now.to_s
56
+ feed.entries << entry
57
+ feed.entries << Feedzirra::Parser::AtomEntry.new
58
+ feed.last_modified.should == entry.published
59
+ end
60
+ end
61
+
62
+ describe "#update_from_feed" do
63
+ describe "updating feed attributes" do
64
+ before(:each) do
65
+ # I'm using the Atom class when I know I should be using a different one. However, this update_from_feed
66
+ # method would only be called against a feed item.
67
+ @feed = Feedzirra::Parser::Atom.new
68
+ @feed.title = "A title"
69
+ @feed.url = "http://pauldix.net"
70
+ @feed.feed_url = "http://feeds.feedburner.com/PaulDixExplainsNothing"
71
+ @feed.updated = false
72
+ @updated_feed = @feed.dup
73
+ end
74
+
75
+ it "should update the title if changed" do
76
+ @updated_feed.title = "new title"
77
+ @feed.update_from_feed(@updated_feed)
78
+ @feed.title.should == @updated_feed.title
79
+ @feed.should be_updated
80
+ end
81
+
82
+ it "should not update the title if the same" do
83
+ @feed.update_from_feed(@updated_feed)
84
+ @feed.should_not be_updated
85
+ end
86
+
87
+ it "should update the feed_url if changed" do
88
+ @updated_feed.feed_url = "a new feed url"
89
+ @feed.update_from_feed(@updated_feed)
90
+ @feed.feed_url.should == @updated_feed.feed_url
91
+ @feed.should be_updated
92
+ end
93
+
94
+ it "should not update the feed_url if the same" do
95
+ @feed.update_from_feed(@updated_feed)
96
+ @feed.should_not be_updated
97
+ end
98
+
99
+ it "should update the url if changed" do
100
+ @updated_feed.url = "a new url"
101
+ @feed.update_from_feed(@updated_feed)
102
+ @feed.url.should == @updated_feed.url
103
+ end
104
+
105
+ it "should not update the url if not changed" do
106
+ @feed.update_from_feed(@updated_feed)
107
+ @feed.should_not be_updated
108
+ end
109
+ end
110
+
111
+ describe "updating entries" do
112
+ before(:each) do
113
+ # I'm using the Atom class when I know I should be using a different one. However, this update_from_feed
114
+ # method would only be called against a feed item.
115
+ @feed = Feedzirra::Parser::Atom.new
116
+ @feed.title = "A title"
117
+ @feed.url = "http://pauldix.net"
118
+ @feed.feed_url = "http://feeds.feedburner.com/PaulDixExplainsNothing"
119
+ @feed.updated = false
120
+ @updated_feed = @feed.dup
121
+ @old_entry = Feedzirra::Parser::AtomEntry.new
122
+ @old_entry.url = "http://pauldix.net/old.html"
123
+ @old_entry.published = Time.now.to_s
124
+ @new_entry = Feedzirra::Parser::AtomEntry.new
125
+ @new_entry.url = "http://pauldix.net/new.html"
126
+ @new_entry.published = (Time.now + 10).to_s
127
+ @feed.entries << @old_entry
128
+ @updated_feed.entries << @new_entry
129
+ @updated_feed.entries << @old_entry
130
+ end
131
+
132
+ it "should update last-modified from the latest entry date" do
133
+ @feed.update_from_feed(@updated_feed)
134
+ @feed.last_modified.should == @new_entry.published
135
+ end
136
+
137
+ it "should put new entries into new_entries" do
138
+ @feed.update_from_feed(@updated_feed)
139
+ @feed.new_entries.should == [@new_entry]
140
+ end
141
+
142
+ it "should also put new entries into the entries collection" do
143
+ @feed.update_from_feed(@updated_feed)
144
+ @feed.entries.should include(@new_entry)
145
+ @feed.entries.should include(@old_entry)
146
+ end
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,49 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::AtomEntry do
4
+ before(:each) do
5
+ # I don't really like doing it this way because these unit test should only rely on AtomEntry,
6
+ # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
7
+ @entry = Feedzirra::Parser::Atom.parse(sample_atom_feed).entries.first
8
+ end
9
+
10
+ it "should parse the title" do
11
+ @entry.title.should == "AWS Job: Architect & Designer Position in Turkey"
12
+ end
13
+
14
+ it "should parse the url" do
15
+ @entry.url.should == "http://aws.typepad.com/aws/2009/01/aws-job-architect-designer-position-in-turkey.html"
16
+ end
17
+
18
+ it "should parse the url even when" do
19
+ Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).entries.first.url.should == "http://www.innoq.com/blog/phaus/2009/07/ja.html"
20
+ end
21
+
22
+ it "should parse the author" do
23
+ @entry.author.should == "AWS Editor"
24
+ end
25
+
26
+ it "should parse the content" do
27
+ @entry.content.should == sample_atom_entry_content
28
+ end
29
+
30
+ it "should provide a summary" do
31
+ @entry.summary.should == "Late last year an entrepreneur from Turkey visited me at Amazon HQ in Seattle. We talked about his plans to use AWS as part of his new social video portal startup. I won't spill any beans before he's ready to..."
32
+ end
33
+
34
+ it "should parse the published date" do
35
+ @entry.published.to_s.should == "Fri Jan 16 18:21:00 UTC 2009"
36
+ end
37
+
38
+ it "should parse the categories" do
39
+ @entry.categories.should == ['Turkey', 'Seattle']
40
+ end
41
+
42
+ it "should parse the updated date" do
43
+ @entry.updated.to_s.should == "Fri Jan 16 18:21:00 UTC 2009"
44
+ end
45
+
46
+ it "should parse the id" do
47
+ @entry.id.should == "tag:typepad.com,2003:post-61484736"
48
+ end
49
+ end
@@ -0,0 +1,42 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::AtomFeedBurnerEntry do
4
+ before(:each) do
5
+ # I don't really like doing it this way because these unit test should only rely on AtomEntry,
6
+ # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
7
+ @entry = Feedzirra::Parser::AtomFeedBurner.parse(sample_feedburner_atom_feed).entries.first
8
+ end
9
+
10
+ it "should parse the title" do
11
+ @entry.title.should == "Making a Ruby C library even faster"
12
+ end
13
+
14
+ it "should be able to fetch a url via the 'alternate' rel if no origLink exists" do
15
+ entry = Feedzirra::Parser::AtomFeedBurner.parse(File.read("#{File.dirname(__FILE__)}/../../sample_feeds/PaulDixExplainsNothingAlternate.xml")).entries.first
16
+ entry.url.should == 'http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~3/519925023/making-a-ruby-c-library-even-faster.html'
17
+ end
18
+
19
+ it "should parse the url" do
20
+ @entry.url.should == "http://www.pauldix.net/2009/01/making-a-ruby-c-library-even-faster.html"
21
+ end
22
+
23
+ it "should parse the author" do
24
+ @entry.author.should == "Paul Dix"
25
+ end
26
+
27
+ it "should parse the content" do
28
+ @entry.content.should == sample_feedburner_atom_entry_content
29
+ end
30
+
31
+ it "should provide a summary" do
32
+ @entry.summary.should == "Last week I released the first version of a SAX based XML parsing library called SAX-Machine. It uses Nokogiri, which uses libxml, so it's pretty fast. However, I felt that it could be even faster. The only question was how..."
33
+ end
34
+
35
+ it "should parse the published date" do
36
+ @entry.published.to_s.should == "Thu Jan 22 15:50:22 UTC 2009"
37
+ end
38
+
39
+ it "should parse the categories" do
40
+ @entry.categories.should == ['Ruby', 'Another Category']
41
+ end
42
+ end
@@ -0,0 +1,39 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::AtomFeedBurner do
4
+ describe "#will_parse?" do
5
+ it "should return true for a feedburner atom feed" do
6
+ Feedzirra::Parser::AtomFeedBurner.should be_able_to_parse(sample_feedburner_atom_feed)
7
+ end
8
+
9
+ it "should return false for an rdf feed" do
10
+ Feedzirra::Parser::AtomFeedBurner.should_not be_able_to_parse(sample_rdf_feed)
11
+ end
12
+
13
+ it "should return false for a regular atom feed" do
14
+ Feedzirra::Parser::AtomFeedBurner.should_not be_able_to_parse(sample_atom_feed)
15
+ end
16
+ end
17
+
18
+ describe "parsing" do
19
+ before(:each) do
20
+ @feed = Feedzirra::Parser::AtomFeedBurner.parse(sample_feedburner_atom_feed)
21
+ end
22
+
23
+ it "should parse the title" do
24
+ @feed.title.should == "Paul Dix Explains Nothing"
25
+ end
26
+
27
+ it "should parse the url" do
28
+ @feed.url.should == "http://www.pauldix.net/"
29
+ end
30
+
31
+ it "should parse the feed_url" do
32
+ @feed.feed_url.should == "http://feeds.feedburner.com/PaulDixExplainsNothing"
33
+ end
34
+
35
+ it "should parse entries" do
36
+ @feed.entries.size.should == 5
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,43 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::Atom do
4
+ describe "#will_parse?" do
5
+ it "should return true for an atom feed" do
6
+ Feedzirra::Parser::Atom.should be_able_to_parse(sample_atom_feed)
7
+ end
8
+
9
+ it "should return false for an rdf feed" do
10
+ Feedzirra::Parser::Atom.should_not be_able_to_parse(sample_rdf_feed)
11
+ end
12
+ end
13
+
14
+ describe "parsing" do
15
+ before(:each) do
16
+ @feed = Feedzirra::Parser::Atom.parse(sample_atom_feed)
17
+ end
18
+
19
+ it "should parse the title" do
20
+ @feed.title.should == "Amazon Web Services Blog"
21
+ end
22
+
23
+ it "should parse the url" do
24
+ @feed.url.should == "http://aws.typepad.com/aws/"
25
+ end
26
+
27
+ it "should parse the url even when it doesn't have the type='text/html' attribute" do
28
+ Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).url.should == "http://www.innoq.com/planet/"
29
+ end
30
+
31
+ it "should parse the feed_url even when it doesn't have the type='application/atom+xml' attribute" do
32
+ Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).feed_url.should == "http://www.innoq.com/planet/atom.xml"
33
+ end
34
+
35
+ it "should parse the feed_url" do
36
+ @feed.feed_url.should == "http://aws.typepad.com/aws/atom.xml"
37
+ end
38
+
39
+ it "should parse entries" do
40
+ @feed.entries.size.should == 10
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,32 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::RSSEntry::MRSSContent do
4
+ before(:each) do
5
+ # I don't really like doing it this way because these unit test should only rely on RSSEntry,
6
+ # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
7
+ @entries = Feedzirra::Parser::RSS.parse(sample_mrss_feed).entries
8
+ end
9
+
10
+ it "should parse the media" do
11
+ entry = @entries.first
12
+ entry.media_content.size.should == 1
13
+ entry.media_description.should == 'The story began with a July 23 article in a local newspaper, The Independent. Jenna Hewitt, 26, of Montauk, and three friends said they found the ...'
14
+ entry.media_thumbnail.should == 'http://3.gvt0.com/vi/Y3rNEu4A8WM/default.jpg'
15
+ entry.media_thumbnail_width.should == '320'
16
+ entry.media_thumbnail_height.should == '240'
17
+ end
18
+
19
+ it "should handle multiple pieces of content" do
20
+ media = @entries[1].media_content
21
+ media.size.should == 2
22
+ media[0].url.should == 'http://www.youtube.com/v/pvaM6sjLbuA&#38;fs=1'
23
+ media[0].content_type.should == 'application/x-shockwave-flash'
24
+ media[0].medium.should == 'video'
25
+ media[0].duration.should == '575'
26
+
27
+ media[1].url.should == 'http://www.youtube.com/v/pvaM6sjLbuA&#38;fs=2'
28
+ media[1].content_type.should == 'video/mp4'
29
+ media[1].medium.should == 'video'
30
+ media[1].duration.should == '576'
31
+ end
32
+ end
@@ -0,0 +1,154 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::RSSEntry do
4
+ before do
5
+ # I don't really like doing it this way because these unit test should only rely on RSSEntry,
6
+ # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
7
+ @entry = Feedzirra::Parser::RSS.parse(sample_rss_feed).entries.first
8
+ end
9
+
10
+ it "should parse the title" do
11
+ @entry.title.should == "Nokogiri’s Slop Feature"
12
+ end
13
+
14
+ it "should parse the url" do
15
+ @entry.url.should == "http://tenderlovemaking.com/2008/12/04/nokogiris-slop-feature/"
16
+ end
17
+
18
+ it "should parse the author" do
19
+ @entry.author.should == "Aaron Patterson"
20
+ end
21
+
22
+ it "should parse the content" do
23
+ @entry.content.should == sample_rss_entry_content
24
+ end
25
+
26
+ it "should provide a summary" do
27
+ @entry.summary.should == "Oops! When I released nokogiri version 1.0.7, I totally forgot to talk about Nokogiri::Slop() feature that was added. Why is it called \"slop\"? It lets you sloppily explore documents. Basically, it decorates your document with method_missing() that allows you to search your document via method calls.\nGiven this document:\n\ndoc = Nokogiri::Slop&#40;&#60;&#60;-eohtml&#41;\n&#60;html&#62;\n&#160; &#60;body&#62;\n&#160; [...]"
28
+ end
29
+
30
+ it "should parse the published date" do
31
+ @entry.published.to_s.should == "Thu Dec 04 17:17:49 UTC 2008"
32
+ end
33
+
34
+ it "should parse the categories" do
35
+ @entry.categories.should == ['computadora', 'nokogiri', 'rails']
36
+ end
37
+
38
+ it "should parse the guid as id" do
39
+ @entry.id.should == "http://tenderlovemaking.com/?p=198"
40
+ end
41
+
42
+ describe "parsing an iTunes feed" do
43
+ before do
44
+ @item = Feedzirra::Parser::RSS.parse(sample_itunes_feed).entries.first
45
+ end
46
+
47
+ it "should parse the title" do
48
+ @item.title.should == "Shake Shake Shake Your Spices"
49
+ end
50
+
51
+ it "should parse the author" do
52
+ @item.author.should == "John Doe"
53
+ end
54
+
55
+ it "should parse the subtitle" do
56
+ @item.subtitle.should == "A short primer on table spices"
57
+ end
58
+
59
+ it "should parse the summary" do
60
+ @item.summary.should == "This week we talk about salt and pepper shakers, comparing and contrasting pour rates, construction materials, and overall aesthetics. Come and join the party!"
61
+ end
62
+
63
+ it "should parse the enclosure" do
64
+ @item.enclosure_length.should == "8727310"
65
+ @item.enclosure_type.should == "audio/x-m4a"
66
+ @item.enclosure_url.should == "http://example.com/podcasts/everything/AllAboutEverythingEpisode3.m4a"
67
+ end
68
+
69
+ it "should parse the id" do
70
+ @item.id.should == "http://example.com/podcasts/archive/aae20050615.m4a"
71
+ end
72
+
73
+ it "should parse the published date" do
74
+ @item.published.should == Time.parse('Wed Jun 15 19:00:00 UTC 2005')
75
+ end
76
+
77
+ it "should parse the duration" do
78
+ @item.duration.should == "7:04"
79
+ end
80
+
81
+ it "should parse the keywords" do
82
+ @item.keywords.should == "salt, pepper, shaker, exciting"
83
+ end
84
+ end
85
+
86
+ describe "parsing Media RSS" do
87
+ before do
88
+ @item = Feedzirra::Parser::RSS.parse(sample_mrss_feed).entries.first
89
+ end
90
+
91
+ it "should parse media:rating" do
92
+ @item.rating.should == 'adult'
93
+ @item.rating_scheme.should == 'urn:simple'
94
+ end
95
+
96
+ it "should parse media:title" do
97
+ @item.media_title.should == 'The Montauk Monster-Hells Visits New York!'
98
+ end
99
+
100
+ it "should parse media:description" do
101
+ @item.media_description.should == 'The story began with a July 23 article in a local newspaper, The Independent. Jenna Hewitt, 26, of Montauk, and three friends said they found the ...'
102
+ end
103
+
104
+ it "should parse media:keywords" do
105
+ @item.media_keywords.should == 'kitty, cat, big dog, yarn, fluffy'
106
+ end
107
+
108
+ it "should parse media:tumbnail" do
109
+ @item.media_content.size.should == 1
110
+ @item.media_description.should == 'The story began with a July 23 article in a local newspaper, The Independent. Jenna Hewitt, 26, of Montauk, and three friends said they found the ...'
111
+ @item.media_thumbnail.should == 'http://3.gvt0.com/vi/Y3rNEu4A8WM/default.jpg'
112
+ @item.media_thumbnail_width.should == '320'
113
+ @item.media_thumbnail_height.should == '240'
114
+ end
115
+
116
+ it "should parse media:category" do
117
+ @item.media_category.should == 'Arts/Movies/Titles/A/Ace_Ventura_Series/Ace_Ventura_-_Pet_Detective'
118
+ @item.media_category_scheme.should == 'http://dmoz.org'
119
+ @item.media_category_label.should == 'Ace Ventura - Pet Detective'
120
+ end
121
+
122
+ it "should parse media:hash" do
123
+ @item.media_hash.should == 'dfdec888b72151965a34b4b59031290a'
124
+ @item.media_hash_algo.should == 'md5'
125
+ end
126
+
127
+ it "should parse media:player" do
128
+ @item.media_player_url.should == 'http://www.example.com/player?id=1111'
129
+ @item.media_player_width.should == '400'
130
+ @item.media_player_height.should == '200'
131
+ end
132
+
133
+ it "should parse media:credit" do
134
+ @item.credits.size.should == 2
135
+ @item.credits.first.role.should == 'producer'
136
+ @item.credits.first.scheme.should == 'urn:ebu'
137
+ pending 'not sure why the name isn\'t getting set'
138
+ @item.credits.first.name.should == 'John Doe'
139
+ end
140
+
141
+ it "should parse media:copyright" do
142
+ @item.copyright.should == '2009 Example Co.'
143
+ @item.copyright_url.should == 'http://example.com/copyright.html'
144
+ end
145
+
146
+ it "should parse media:restriction" do
147
+ pending 'need to figure out why this is getting String'
148
+ @item.media_restriction.type.should == 'MRSSRestriction'
149
+ @item.media_restriction.value.should == 'au us'
150
+ @item.media_restriction.scope.should == 'country'
151
+ @item.media_restriction.relationship.should == 'allow'
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,93 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::RSS do
4
+ describe "#will_parse?" do
5
+ it "should return true for an RSS feed" do
6
+ Feedzirra::Parser::RSS.should be_able_to_parse(sample_rss_feed)
7
+ end
8
+
9
+ it "should return true for an rdf feed" do
10
+ Feedzirra::Parser::RSS.should be_able_to_parse(sample_rdf_feed)
11
+ end
12
+
13
+ it "should return true for an iTunes feed" do
14
+ Feedzirra::Parser::RSS.should be_able_to_parse(sample_itunes_feed)
15
+ end
16
+
17
+ it "should return fase for an atom feed" do
18
+ Feedzirra::Parser::RSS.should_not be_able_to_parse(sample_atom_feed)
19
+ end
20
+ end
21
+
22
+ describe "parsing" do
23
+ before do
24
+ @feed = Feedzirra::Parser::RSS.parse(sample_mrss_feed)
25
+ end
26
+
27
+ it "should parse the title" do
28
+ @feed.title.should == "Google Video - Hot videos"
29
+ end
30
+
31
+ it "should parse the url" do
32
+ @feed.url.should == "http://video.google.com/"
33
+ end
34
+
35
+ it "should provide an accessor for the feed_url" do
36
+ @feed.respond_to?(:feed_url).should == true
37
+ @feed.respond_to?(:feed_url=).should == true
38
+ end
39
+
40
+ it "should parse entries" do
41
+ @feed.entries.size.should == 20
42
+ end
43
+
44
+ it "should parse the image" do
45
+ pending 'setting NilClass for some reason'
46
+ @feed.image.class.should == 'RSSImage'
47
+ @feed.image.title.should == 'Google Video - Hot videos'
48
+ @feed.image.link.should == 'http://video.google.com/'
49
+ @feed.image.url.should == 'http://video.google.com/common/google_logo_small.jpg'
50
+ @feed.image.width.should == '100'
51
+ @feed.image.height.should == '37'
52
+ end
53
+
54
+ describe "parsing an iTunes feed" do
55
+ before do
56
+ @feed = Feedzirra::Parser::RSS.parse(sample_itunes_feed)
57
+ end
58
+
59
+ it "should parse an image" do
60
+ @feed.image.should == "http://example.com/podcasts/everything/AllAboutEverything.jpg"
61
+ end
62
+
63
+ it "should parse categories" do
64
+ @feed.categories.size == 2
65
+ @feed.categories[0].should == "Technology"
66
+ @feed.categories[1].should == "Gadgets"
67
+ @feed.categories[2].should == "TV &#38; Film"
68
+
69
+ # @feed.categories[0].name.should == "Technology"
70
+ # @feed.categories[0].sub_categories.size.should == 1
71
+ # @feed.categories[0].sub_categories[0].should == "Gadgets"
72
+ # @feed.categories[1].name.should == "TV &amp; Film"
73
+ # @feed.categories[1].sub_categories.size.should == 0
74
+ end
75
+
76
+ it "should parse the summary" do
77
+ @feed.summary.should == "All About Everything is a show about everything. Each week we dive into any subject known to man and talk about it as much as we can. Look for our Podcast in the iTunes Music Store"
78
+ end
79
+
80
+ it "should parse entries" do
81
+ @feed.entries.size.should == 4
82
+ end
83
+
84
+ it "should parse the owner name" do
85
+ @feed.owner_name.should == 'John Doe'
86
+ end
87
+
88
+ it "should parse the owner email" do
89
+ @feed.owner_email.should == 'john.doe@example.com'
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,20 @@
1
+ require 'rubygems'
2
+ require File.dirname(__FILE__) + "/../../lib/feedzirra.rb"
3
+
4
+ feed_urls = File.readlines(File.dirname(__FILE__) + "/top5kfeeds.dat").collect {|line| line.split.first}
5
+
6
+ success = lambda do |url, feed|
7
+ puts "SUCCESS - #{feed.title} - #{url}"
8
+ end
9
+
10
+ failed_feeds = []
11
+ failure = lambda do |url, response_code, header, body|
12
+ failed_feeds << url if response_code == 200
13
+ puts "*********** FAILED with #{response_code} on #{url}"
14
+ end
15
+
16
+ Feedzirra::Feed.fetch_and_parse(feed_urls, :on_success => success, :on_failure => failure)
17
+
18
+ File.open("./failed_urls.txt", "w") do |f|
19
+ f.write failed_feeds.join("\n")
20
+ end
@@ -0,0 +1,62 @@
1
+ require "rubygems"
2
+ require "spec"
3
+
4
+ # gem install redgreen for colored test output
5
+ begin require "redgreen" unless ENV['TM_CURRENT_LINE']; rescue LoadError; end
6
+
7
+ path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
8
+ $LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
9
+
10
+ require "lib/feedzirra"
11
+
12
+ def load_sample(filename)
13
+ File.read("#{File.dirname(__FILE__)}/sample_feeds/#{filename}")
14
+ end
15
+
16
+ def sample_atom_feed
17
+ load_sample("AmazonWebServicesBlog.xml")
18
+ end
19
+
20
+ def sample_atom_entry_content
21
+ load_sample("AmazonWebServicesBlogFirstEntryContent.xml")
22
+ end
23
+
24
+ def sample_itunes_feed
25
+ load_sample("itunes.xml")
26
+ end
27
+
28
+ def sample_rdf_feed
29
+ load_sample("HREFConsideredHarmful.xml")
30
+ end
31
+
32
+ def sample_rdf_entry_content
33
+ load_sample("HREFConsideredHarmfulFirstEntry.xml")
34
+ end
35
+
36
+ def sample_rss_feed_burner_feed
37
+ load_sample("SamHarrisAuthorPhilosopherEssayistAtheist.xml")
38
+ end
39
+
40
+ def sample_rss_feed
41
+ load_sample("TenderLovemaking.xml")
42
+ end
43
+
44
+ def sample_rss_entry_content
45
+ load_sample("TenderLovemakingFirstEntry.xml")
46
+ end
47
+
48
+ def sample_feedburner_atom_feed
49
+ load_sample("PaulDixExplainsNothing.xml")
50
+ end
51
+
52
+ def sample_feedburner_atom_entry_content
53
+ load_sample("PaulDixExplainsNothingFirstEntryContent.xml")
54
+ end
55
+
56
+ def sample_wfw_feed
57
+ load_sample("PaulDixExplainsNothingWFW.xml")
58
+ end
59
+
60
+ def sample_mrss_feed
61
+ load_sample("GoogleVideoSample.xml")
62
+ end