jashmenn-feedzirra 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. data/.rspec +1 -0
  2. data/README.rdoc +177 -0
  3. data/Rakefile +51 -0
  4. data/lib/feedzirra.rb +20 -0
  5. data/lib/feedzirra/core_ext.rb +3 -0
  6. data/lib/feedzirra/core_ext/date.rb +19 -0
  7. data/lib/feedzirra/core_ext/string.rb +9 -0
  8. data/lib/feedzirra/feed.rb +384 -0
  9. data/lib/feedzirra/feed_entry_utilities.rb +65 -0
  10. data/lib/feedzirra/feed_utilities.rb +61 -0
  11. data/lib/feedzirra/parser.rb +20 -0
  12. data/lib/feedzirra/parser/atom.rb +29 -0
  13. data/lib/feedzirra/parser/atom_entry.rb +30 -0
  14. data/lib/feedzirra/parser/atom_feed_burner.rb +21 -0
  15. data/lib/feedzirra/parser/atom_feed_burner_entry.rb +31 -0
  16. data/lib/feedzirra/parser/google_docs_atom.rb +28 -0
  17. data/lib/feedzirra/parser/google_docs_atom_entry.rb +29 -0
  18. data/lib/feedzirra/parser/itunes_rss.rb +50 -0
  19. data/lib/feedzirra/parser/itunes_rss_item.rb +32 -0
  20. data/lib/feedzirra/parser/itunes_rss_owner.rb +12 -0
  21. data/lib/feedzirra/parser/rss.rb +22 -0
  22. data/lib/feedzirra/parser/rss_entry.rb +34 -0
  23. data/lib/feedzirra/parser/rss_feed_burner.rb +22 -0
  24. data/lib/feedzirra/parser/rss_feed_burner_entry.rb +40 -0
  25. data/lib/feedzirra/version.rb +3 -0
  26. data/spec/benchmarks/feed_benchmarks.rb +98 -0
  27. data/spec/benchmarks/feedzirra_benchmarks.rb +40 -0
  28. data/spec/benchmarks/fetching_benchmarks.rb +28 -0
  29. data/spec/benchmarks/parsing_benchmark.rb +30 -0
  30. data/spec/benchmarks/updating_benchmarks.rb +33 -0
  31. data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
  32. data/spec/feedzirra/feed_spec.rb +597 -0
  33. data/spec/feedzirra/feed_utilities_spec.rb +152 -0
  34. data/spec/feedzirra/parser/atom_entry_spec.rb +86 -0
  35. data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +47 -0
  36. data/spec/feedzirra/parser/atom_feed_burner_spec.rb +47 -0
  37. data/spec/feedzirra/parser/atom_spec.rb +51 -0
  38. data/spec/feedzirra/parser/google_docs_atom_entry_spec.rb +22 -0
  39. data/spec/feedzirra/parser/google_docs_atom_spec.rb +31 -0
  40. data/spec/feedzirra/parser/itunes_rss_item_spec.rb +48 -0
  41. data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +18 -0
  42. data/spec/feedzirra/parser/itunes_rss_spec.rb +54 -0
  43. data/spec/feedzirra/parser/rss_entry_spec.rb +85 -0
  44. data/spec/feedzirra/parser/rss_feed_burner_entry_spec.rb +85 -0
  45. data/spec/feedzirra/parser/rss_feed_burner_spec.rb +52 -0
  46. data/spec/feedzirra/parser/rss_spec.rb +49 -0
  47. data/spec/sample_feeds/run_against_sample.rb +20 -0
  48. data/spec/spec_helper.rb +78 -0
  49. metadata +228 -0
@@ -0,0 +1,152 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+ require 'rubygems'
3
+ require 'active_support'
4
+
5
+ describe Feedzirra::FeedUtilities do
6
+ before(:each) do
7
+ @klass = Class.new do
8
+ include Feedzirra::FeedUtilities
9
+ end
10
+ end
11
+
12
+ describe "instance methods" do
13
+ it "should provide an updated? accessor" do
14
+ feed = @klass.new
15
+ feed.should_not be_updated
16
+ feed.updated = true
17
+ feed.should be_updated
18
+ end
19
+
20
+ it "should provide a new_entries accessor" do
21
+ feed = @klass.new
22
+ feed.new_entries.should == []
23
+ feed.new_entries = [:foo]
24
+ feed.new_entries.should == [:foo]
25
+ end
26
+
27
+ it "should provide an etag accessor" do
28
+ feed = @klass.new
29
+ feed.etag = "foo"
30
+ feed.etag.should == "foo"
31
+ end
32
+
33
+ it "should provide a last_modified accessor" do
34
+ feed = @klass.new
35
+ time = Time.now
36
+ feed.last_modified = time
37
+ feed.last_modified.should == time
38
+ feed.last_modified.class.should == Time
39
+ end
40
+
41
+ it "should return new_entries? as true when entries are put into new_entries" do
42
+ feed = @klass.new
43
+ feed.new_entries << :foo
44
+ feed.should have_new_entries
45
+ end
46
+
47
+ it "should return a last_modified value from the entry with the most recent published date if the last_modified date hasn't been set" do
48
+ feed = Feedzirra::Parser::Atom.new
49
+ entry =Feedzirra::Parser::AtomEntry.new
50
+ entry.published = Time.now.to_s
51
+ feed.entries << entry
52
+ feed.last_modified.should == entry.published
53
+ end
54
+
55
+ it "should not throw an error if one of the entries has published date of nil" do
56
+ feed = Feedzirra::Parser::Atom.new
57
+ entry = Feedzirra::Parser::AtomEntry.new
58
+ entry.published = Time.now.to_s
59
+ feed.entries << entry
60
+ feed.entries << Feedzirra::Parser::AtomEntry.new
61
+ feed.last_modified.should == entry.published
62
+ end
63
+ end
64
+
65
+ describe "#update_from_feed" do
66
+ describe "updating feed attributes" do
67
+ before(:each) do
68
+ # I'm using the Atom class when I know I should be using a different one. However, this update_from_feed
69
+ # method would only be called against a feed item.
70
+ @feed = Feedzirra::Parser::Atom.new
71
+ @feed.title = "A title"
72
+ @feed.url = "http://pauldix.net"
73
+ @feed.feed_url = "http://feeds.feedburner.com/PaulDixExplainsNothing"
74
+ @feed.updated = false
75
+ @updated_feed = @feed.dup
76
+ end
77
+
78
+ it "should update the title if changed" do
79
+ @updated_feed.title = "new title"
80
+ @feed.update_from_feed(@updated_feed)
81
+ @feed.title.should == @updated_feed.title
82
+ @feed.should be_updated
83
+ end
84
+
85
+ it "should not update the title if the same" do
86
+ @feed.update_from_feed(@updated_feed)
87
+ @feed.should_not be_updated
88
+ end
89
+
90
+ it "should update the feed_url if changed" do
91
+ @updated_feed.feed_url = "a new feed url"
92
+ @feed.update_from_feed(@updated_feed)
93
+ @feed.feed_url.should == @updated_feed.feed_url
94
+ @feed.should be_updated
95
+ end
96
+
97
+ it "should not update the feed_url if the same" do
98
+ @feed.update_from_feed(@updated_feed)
99
+ @feed.should_not be_updated
100
+ end
101
+
102
+ it "should update the url if changed" do
103
+ @updated_feed.url = "a new url"
104
+ @feed.update_from_feed(@updated_feed)
105
+ @feed.url.should == @updated_feed.url
106
+ end
107
+
108
+ it "should not update the url if not changed" do
109
+ @feed.update_from_feed(@updated_feed)
110
+ @feed.should_not be_updated
111
+ end
112
+ end
113
+
114
+ describe "updating entries" do
115
+ before(:each) do
116
+ # I'm using the Atom class when I know I should be using a different one. However, this update_from_feed
117
+ # method would only be called against a feed item.
118
+ @feed = Feedzirra::Parser::Atom.new
119
+ @feed.title = "A title"
120
+ @feed.url = "http://pauldix.net"
121
+ @feed.feed_url = "http://feeds.feedburner.com/PaulDixExplainsNothing"
122
+ @feed.updated = false
123
+ @updated_feed = @feed.dup
124
+ @old_entry = Feedzirra::Parser::AtomEntry.new
125
+ @old_entry.url = "http://pauldix.net/old.html"
126
+ @old_entry.published = Time.now.to_s
127
+ @new_entry = Feedzirra::Parser::AtomEntry.new
128
+ @new_entry.url = "http://pauldix.net/new.html"
129
+ @new_entry.published = (Time.now + 10).to_s
130
+ @feed.entries << @old_entry
131
+ @updated_feed.entries << @new_entry
132
+ @updated_feed.entries << @old_entry
133
+ end
134
+
135
+ it "should update last-modified from the latest entry date" do
136
+ @feed.update_from_feed(@updated_feed)
137
+ @feed.last_modified.should == @new_entry.published
138
+ end
139
+
140
+ it "should put new entries into new_entries" do
141
+ @feed.update_from_feed(@updated_feed)
142
+ @feed.new_entries.should == [@new_entry]
143
+ end
144
+
145
+ it "should also put new entries into the entries collection" do
146
+ @feed.update_from_feed(@updated_feed)
147
+ @feed.entries.should include(@new_entry)
148
+ @feed.entries.should include(@old_entry)
149
+ end
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,86 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::AtomEntry do
4
+ before(:each) do
5
+ # I don't really like doing it this way because these unit test should only rely on AtomEntry,
6
+ # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
7
+ @entry = Feedzirra::Parser::Atom.parse(sample_atom_feed).entries.first
8
+ end
9
+
10
+ it "should parse the title" do
11
+ @entry.title.should == "AWS Job: Architect & Designer Position in Turkey"
12
+ end
13
+
14
+ it "should parse the url" do
15
+ @entry.url.should == "http://aws.typepad.com/aws/2009/01/aws-job-architect-designer-position-in-turkey.html"
16
+ end
17
+
18
+ it "should parse the url even when" do
19
+ Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).entries.first.url.should == "http://www.innoq.com/blog/phaus/2009/07/ja.html"
20
+ end
21
+
22
+ it "should parse the author" do
23
+ @entry.author.should == "AWS Editor"
24
+ end
25
+
26
+ it "should parse the content" do
27
+ @entry.content.should == sample_atom_entry_content
28
+ end
29
+
30
+ it "should provide a summary" do
31
+ @entry.summary.should == "Late last year an entrepreneur from Turkey visited me at Amazon HQ in Seattle. We talked about his plans to use AWS as part of his new social video portal startup. I won't spill any beans before he's ready to..."
32
+ end
33
+
34
+ it "should parse the published date" do
35
+ @entry.published.to_s.should == "Fri Jan 16 18:21:00 UTC 2009"
36
+ end
37
+
38
+ it "should parse the categories" do
39
+ @entry.categories.should == ['Turkey', 'Seattle']
40
+ end
41
+
42
+ it "should parse the updated date" do
43
+ @entry.updated.to_s.should == "Fri Jan 16 18:21:00 UTC 2009"
44
+ end
45
+
46
+ it "should parse the id" do
47
+ @entry.id.should == "tag:typepad.com,2003:post-61484736"
48
+ end
49
+
50
+ it "should support each" do
51
+ @entry.respond_to? :each
52
+ end
53
+
54
+ it "should be able to list out all fields with each" do
55
+ all_fields = []
56
+ @entry.each do |field, value|
57
+ all_fields << field
58
+ end
59
+ all_fields.sort == ['author', 'categories', 'content', 'id', 'published', 'summary', 'title', 'url']
60
+ end
61
+
62
+ it "should be able to list out all values with each" do
63
+ title_value = ''
64
+ @entry.each do |field, value|
65
+ title_value = value if field == 'title'
66
+ end
67
+ title_value.should == "AWS Job: Architect & Designer Position in Turkey"
68
+ end
69
+
70
+ it "should support checking if a field exists in the entry" do
71
+ @entry.include?('title') && @entry.include?('author')
72
+ end
73
+
74
+ it "should allow access to fields with hash syntax" do
75
+ @entry['title'] == @entry.title
76
+ @entry['title'].should == "AWS Job: Architect & Designer Position in Turkey"
77
+ @entry['author'] == @entry.author
78
+ @entry['author'].should == "AWS Editor"
79
+ end
80
+
81
+ it "should allow setting field values with hash syntax" do
82
+ @entry['title'] = "Foobar"
83
+ @entry.title.should == "Foobar"
84
+ end
85
+
86
+ end
@@ -0,0 +1,47 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::AtomFeedBurnerEntry do
4
+ before(:each) do
5
+ # I don't really like doing it this way because these unit test should only rely on AtomEntry,
6
+ # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
7
+ @entry = Feedzirra::Parser::AtomFeedBurner.parse(sample_feedburner_atom_feed).entries.first
8
+ end
9
+
10
+ it "should parse the title" do
11
+ @entry.title.should == "Making a Ruby C library even faster"
12
+ end
13
+
14
+ it "should be able to fetch a url via the 'alternate' rel if no origLink exists" do
15
+ entry = Feedzirra::Parser::AtomFeedBurner.parse(File.read("#{File.dirname(__FILE__)}/../../sample_feeds/PaulDixExplainsNothingAlternate.xml")).entries.first
16
+ entry.url.should == 'http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~3/519925023/making-a-ruby-c-library-even-faster.html'
17
+ end
18
+
19
+ it "should parse the url" do
20
+ @entry.url.should == "http://www.pauldix.net/2009/01/making-a-ruby-c-library-even-faster.html"
21
+ end
22
+
23
+ it "should parse the url when there is no alternate" do
24
+ entry = Feedzirra::Parser::AtomFeedBurner.parse(File.read("#{File.dirname(__FILE__)}/../../sample_feeds/FeedBurnerUrlNoAlternate.xml")).entries.first
25
+ entry.url.should == 'http://example.com/QQQQ.html'
26
+ end
27
+
28
+ it "should parse the author" do
29
+ @entry.author.should == "Paul Dix"
30
+ end
31
+
32
+ it "should parse the content" do
33
+ @entry.content.should == sample_feedburner_atom_entry_content
34
+ end
35
+
36
+ it "should provide a summary" do
37
+ @entry.summary.should == "Last week I released the first version of a SAX based XML parsing library called SAX-Machine. It uses Nokogiri, which uses libxml, so it's pretty fast. However, I felt that it could be even faster. The only question was how..."
38
+ end
39
+
40
+ it "should parse the published date" do
41
+ @entry.published.to_s.should == "Thu Jan 22 15:50:22 UTC 2009"
42
+ end
43
+
44
+ it "should parse the categories" do
45
+ @entry.categories.should == ['Ruby', 'Another Category']
46
+ end
47
+ end
@@ -0,0 +1,47 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::AtomFeedBurner do
4
+ describe "#will_parse?" do
5
+ it "should return true for a feedburner atom feed" do
6
+ Feedzirra::Parser::AtomFeedBurner.should be_able_to_parse(sample_feedburner_atom_feed)
7
+ end
8
+
9
+ it "should return false for an rdf feed" do
10
+ Feedzirra::Parser::AtomFeedBurner.should_not be_able_to_parse(sample_rdf_feed)
11
+ end
12
+
13
+ it "should return false for a regular atom feed" do
14
+ Feedzirra::Parser::AtomFeedBurner.should_not be_able_to_parse(sample_atom_feed)
15
+ end
16
+
17
+ it "should return false for an rss feedburner feed" do
18
+ Feedzirra::Parser::AtomFeedBurner.should_not be_able_to_parse(sample_rss_feed_burner_feed)
19
+ end
20
+ end
21
+
22
+ describe "parsing" do
23
+ before(:each) do
24
+ @feed = Feedzirra::Parser::AtomFeedBurner.parse(sample_feedburner_atom_feed)
25
+ end
26
+
27
+ it "should parse the title" do
28
+ @feed.title.should == "Paul Dix Explains Nothing"
29
+ end
30
+
31
+ it "should parse the description" do
32
+ @feed.description.should == "Entrepreneurship, programming, software development, politics, NYC, and random thoughts."
33
+ end
34
+
35
+ it "should parse the url" do
36
+ @feed.url.should == "http://www.pauldix.net/"
37
+ end
38
+
39
+ it "should parse the feed_url" do
40
+ @feed.feed_url.should == "http://feeds.feedburner.com/PaulDixExplainsNothing"
41
+ end
42
+
43
+ it "should parse entries" do
44
+ @feed.entries.size.should == 5
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,51 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::Atom do
4
+ describe "#will_parse?" do
5
+ it "should return true for an atom feed" do
6
+ Feedzirra::Parser::Atom.should be_able_to_parse(sample_atom_feed)
7
+ end
8
+
9
+ it "should return false for an rdf feed" do
10
+ Feedzirra::Parser::Atom.should_not be_able_to_parse(sample_rdf_feed)
11
+ end
12
+
13
+ it "should return false for an rss feedburner feed" do
14
+ Feedzirra::Parser::Atom.should_not be_able_to_parse(sample_rss_feed_burner_feed)
15
+ end
16
+ end
17
+
18
+ describe "parsing" do
19
+ before(:each) do
20
+ @feed = Feedzirra::Parser::Atom.parse(sample_atom_feed)
21
+ end
22
+
23
+ it "should parse the title" do
24
+ @feed.title.should == "Amazon Web Services Blog"
25
+ end
26
+
27
+ it "should parse the description" do
28
+ @feed.description.should == "Amazon Web Services, Products, Tools, and Developer Information..."
29
+ end
30
+
31
+ it "should parse the url" do
32
+ @feed.url.should == "http://aws.typepad.com/aws/"
33
+ end
34
+
35
+ it "should parse the url even when it doesn't have the type='text/html' attribute" do
36
+ Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).url.should == "http://www.innoq.com/planet/"
37
+ end
38
+
39
+ it "should parse the feed_url even when it doesn't have the type='application/atom+xml' attribute" do
40
+ Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).feed_url.should == "http://www.innoq.com/planet/atom.xml"
41
+ end
42
+
43
+ it "should parse the feed_url" do
44
+ @feed.feed_url.should == "http://aws.typepad.com/aws/atom.xml"
45
+ end
46
+
47
+ it "should parse entries" do
48
+ @feed.entries.size.should == 10
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,22 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::GoogleDocsAtomEntry do
4
+ describe 'parsing' do
5
+ before do
6
+ @feed = Feedzirra::Parser::GoogleDocsAtom.parse(sample_google_docs_list_feed)
7
+ @entry = @feed.entries.first
8
+ end
9
+
10
+ it 'should have the custom checksum element' do
11
+ @entry.checksum.should eql '2b01142f7481c7b056c4b410d28f33cf'
12
+ end
13
+
14
+ it 'should have the custom filename element' do
15
+ @entry.original_filename.should eql "MyFile.pdf"
16
+ end
17
+
18
+ it 'should have the custom suggested filename element' do
19
+ @entry.suggested_filename.should eql "TaxDocument.pdf"
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,31 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::GoogleDocsAtom do
4
+ describe '.able_to_parser?' do
5
+ it 'should return true for Google Docs feed' do
6
+ Feedzirra::Parser::GoogleDocsAtom.should be_able_to_parse(sample_google_docs_list_feed)
7
+ end
8
+
9
+ it 'should not be able to parse another Atom feed' do
10
+ Feedzirra::Parser::GoogleDocsAtom.should_not be_able_to_parse(sample_atom_feed)
11
+ end
12
+ end
13
+
14
+ describe 'parsing' do
15
+ before do
16
+ @feed = Feedzirra::Parser::GoogleDocsAtom.parse(sample_google_docs_list_feed)
17
+ end
18
+
19
+ it 'should return a bunch of objects' do
20
+ @feed.entries.should_not be_empty
21
+ end
22
+
23
+ it 'should populate a title, interhited from the Atom entry' do
24
+ @feed.title.should_not be_nil
25
+ end
26
+
27
+ it 'should return a bunch of entries of type GoogleDocsAtomEntry' do
28
+ @feed.entries.first.should be_a Feedzirra::Parser::GoogleDocsAtomEntry
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,48 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::ITunesRSSItem do
4
+ before(:each) do
5
+ # I don't really like doing it this way because these unit test should only rely on ITunesRssItem,
6
+ # but this is actually how it should work. You would never just pass entry xml straight to the ITunesRssItem
7
+ @item = Feedzirra::Parser::ITunesRSS.parse(sample_itunes_feed).entries.first
8
+ end
9
+
10
+ it "should parse the title" do
11
+ @item.title.should == "Shake Shake Shake Your Spices"
12
+ end
13
+
14
+ it "should parse the author" do
15
+ @item.itunes_author.should == "John Doe"
16
+ end
17
+
18
+ it "should parse the subtitle" do
19
+ @item.itunes_subtitle.should == "A short primer on table spices"
20
+ end
21
+
22
+ it "should parse the summary" do
23
+ @item.itunes_summary.should == "This week we talk about salt and pepper shakers, comparing and contrasting pour rates, construction materials, and overall aesthetics. Come and join the party!"
24
+ end
25
+
26
+ it "should parse the enclosure" do
27
+ @item.enclosure_length.should == "8727310"
28
+ @item.enclosure_type.should == "audio/x-m4a"
29
+ @item.enclosure_url.should == "http://example.com/podcasts/everything/AllAboutEverythingEpisode3.m4a"
30
+ end
31
+
32
+ it "should parse the guid" do
33
+ @item.guid.should == "http://example.com/podcasts/archive/aae20050615.m4a"
34
+ end
35
+
36
+ it "should parse the published date" do
37
+ @item.published.to_s.should == "Wed Jun 15 19:00:00 UTC 2005"
38
+ end
39
+
40
+ it "should parse the duration" do
41
+ @item.itunes_duration.should == "7:04"
42
+ end
43
+
44
+ it "should parse the keywords" do
45
+ @item.itunes_keywords.should == "salt, pepper, shaker, exciting"
46
+ end
47
+
48
+ end