localist-feedzirra 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. data/.rspec +1 -0
  2. data/README.rdoc +177 -0
  3. data/Rakefile +53 -0
  4. data/lib/feedzirra/core_ext/date.rb +19 -0
  5. data/lib/feedzirra/core_ext/string.rb +9 -0
  6. data/lib/feedzirra/core_ext.rb +3 -0
  7. data/lib/feedzirra/feed.rb +382 -0
  8. data/lib/feedzirra/feed_entry_utilities.rb +65 -0
  9. data/lib/feedzirra/feed_utilities.rb +72 -0
  10. data/lib/feedzirra/parser/atom.rb +29 -0
  11. data/lib/feedzirra/parser/atom_entry.rb +30 -0
  12. data/lib/feedzirra/parser/atom_feed_burner.rb +21 -0
  13. data/lib/feedzirra/parser/atom_feed_burner_entry.rb +31 -0
  14. data/lib/feedzirra/parser/itunes_rss.rb +50 -0
  15. data/lib/feedzirra/parser/itunes_rss_item.rb +32 -0
  16. data/lib/feedzirra/parser/itunes_rss_owner.rb +12 -0
  17. data/lib/feedzirra/parser/rss.rb +22 -0
  18. data/lib/feedzirra/parser/rss_entry.rb +34 -0
  19. data/lib/feedzirra/parser.rb +15 -0
  20. data/lib/feedzirra/version.rb +3 -0
  21. data/lib/feedzirra.rb +19 -0
  22. data/spec/benchmarks/feed_benchmarks.rb +98 -0
  23. data/spec/benchmarks/feedzirra_benchmarks.rb +40 -0
  24. data/spec/benchmarks/fetching_benchmarks.rb +28 -0
  25. data/spec/benchmarks/parsing_benchmark.rb +30 -0
  26. data/spec/benchmarks/updating_benchmarks.rb +33 -0
  27. data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
  28. data/spec/feedzirra/feed_spec.rb +579 -0
  29. data/spec/feedzirra/feed_utilities_spec.rb +152 -0
  30. data/spec/feedzirra/parser/atom_entry_spec.rb +86 -0
  31. data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +47 -0
  32. data/spec/feedzirra/parser/atom_feed_burner_spec.rb +43 -0
  33. data/spec/feedzirra/parser/atom_spec.rb +47 -0
  34. data/spec/feedzirra/parser/itunes_rss_item_spec.rb +48 -0
  35. data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +18 -0
  36. data/spec/feedzirra/parser/itunes_rss_spec.rb +50 -0
  37. data/spec/feedzirra/parser/rss_entry_spec.rb +85 -0
  38. data/spec/feedzirra/parser/rss_spec.rb +45 -0
  39. data/spec/sample_feeds/run_against_sample.rb +20 -0
  40. data/spec/spec_helper.rb +66 -0
  41. metadata +221 -0
@@ -0,0 +1,152 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+ require 'rubygems'
3
+ require 'active_support'
4
+
5
+ describe Feedzirra::FeedUtilities do
6
+ before(:each) do
7
+ @klass = Class.new do
8
+ include Feedzirra::FeedUtilities
9
+ end
10
+ end
11
+
12
+ describe "instance methods" do
13
+ it "should provide an updated? accessor" do
14
+ feed = @klass.new
15
+ feed.should_not be_updated
16
+ feed.updated = true
17
+ feed.should be_updated
18
+ end
19
+
20
+ it "should provide a new_entries accessor" do
21
+ feed = @klass.new
22
+ feed.new_entries.should == []
23
+ feed.new_entries = [:foo]
24
+ feed.new_entries.should == [:foo]
25
+ end
26
+
27
+ it "should provide an etag accessor" do
28
+ feed = @klass.new
29
+ feed.etag = "foo"
30
+ feed.etag.should == "foo"
31
+ end
32
+
33
+ it "should provide a last_modified accessor" do
34
+ feed = @klass.new
35
+ time = Time.now
36
+ feed.last_modified = time
37
+ feed.last_modified.should == time
38
+ feed.last_modified.class.should == Time
39
+ end
40
+
41
+ it "should return new_entries? as true when entries are put into new_entries" do
42
+ feed = @klass.new
43
+ feed.new_entries << :foo
44
+ feed.should have_new_entries
45
+ end
46
+
47
+ it "should return a last_modified value from the entry with the most recent published date if the last_modified date hasn't been set" do
48
+ feed = Feedzirra::Parser::Atom.new
49
+ entry =Feedzirra::Parser::AtomEntry.new
50
+ entry.published = Time.now.to_s
51
+ feed.entries << entry
52
+ feed.last_modified.should == entry.published
53
+ end
54
+
55
+ it "should not throw an error if one of the entries has published date of nil" do
56
+ feed = Feedzirra::Parser::Atom.new
57
+ entry = Feedzirra::Parser::AtomEntry.new
58
+ entry.published = Time.now.to_s
59
+ feed.entries << entry
60
+ feed.entries << Feedzirra::Parser::AtomEntry.new
61
+ feed.last_modified.should == entry.published
62
+ end
63
+ end
64
+
65
+ describe "#update_from_feed" do
66
+ describe "updating feed attributes" do
67
+ before(:each) do
68
+ # I'm using the Atom class when I know I should be using a different one. However, this update_from_feed
69
+ # method would only be called against a feed item.
70
+ @feed = Feedzirra::Parser::Atom.new
71
+ @feed.title = "A title"
72
+ @feed.url = "http://pauldix.net"
73
+ @feed.feed_url = "http://feeds.feedburner.com/PaulDixExplainsNothing"
74
+ @feed.updated = false
75
+ @updated_feed = @feed.dup
76
+ end
77
+
78
+ it "should update the title if changed" do
79
+ @updated_feed.title = "new title"
80
+ @feed.update_from_feed(@updated_feed)
81
+ @feed.title.should == @updated_feed.title
82
+ @feed.should be_updated
83
+ end
84
+
85
+ it "should not update the title if the same" do
86
+ @feed.update_from_feed(@updated_feed)
87
+ @feed.should_not be_updated
88
+ end
89
+
90
+ it "should update the feed_url if changed" do
91
+ @updated_feed.feed_url = "a new feed url"
92
+ @feed.update_from_feed(@updated_feed)
93
+ @feed.feed_url.should == @updated_feed.feed_url
94
+ @feed.should be_updated
95
+ end
96
+
97
+ it "should not update the feed_url if the same" do
98
+ @feed.update_from_feed(@updated_feed)
99
+ @feed.should_not be_updated
100
+ end
101
+
102
+ it "should update the url if changed" do
103
+ @updated_feed.url = "a new url"
104
+ @feed.update_from_feed(@updated_feed)
105
+ @feed.url.should == @updated_feed.url
106
+ end
107
+
108
+ it "should not update the url if not changed" do
109
+ @feed.update_from_feed(@updated_feed)
110
+ @feed.should_not be_updated
111
+ end
112
+ end
113
+
114
+ describe "updating entries" do
115
+ before(:each) do
116
+ # I'm using the Atom class when I know I should be using a different one. However, this update_from_feed
117
+ # method would only be called against a feed item.
118
+ @feed = Feedzirra::Parser::Atom.new
119
+ @feed.title = "A title"
120
+ @feed.url = "http://pauldix.net"
121
+ @feed.feed_url = "http://feeds.feedburner.com/PaulDixExplainsNothing"
122
+ @feed.updated = false
123
+ @updated_feed = @feed.dup
124
+ @old_entry = Feedzirra::Parser::AtomEntry.new
125
+ @old_entry.url = "http://pauldix.net/old.html"
126
+ @old_entry.published = Time.now.to_s
127
+ @new_entry = Feedzirra::Parser::AtomEntry.new
128
+ @new_entry.url = "http://pauldix.net/new.html"
129
+ @new_entry.published = (Time.now + 10).to_s
130
+ @feed.entries << @old_entry
131
+ @updated_feed.entries << @new_entry
132
+ @updated_feed.entries << @old_entry
133
+ end
134
+
135
+ it "should update last-modified from the latest entry date" do
136
+ @feed.update_from_feed(@updated_feed)
137
+ @feed.last_modified.should == @new_entry.published
138
+ end
139
+
140
+ it "should put new entries into new_entries" do
141
+ @feed.update_from_feed(@updated_feed)
142
+ @feed.new_entries.should == [@new_entry]
143
+ end
144
+
145
+ it "should also put new entries into the entries collection" do
146
+ @feed.update_from_feed(@updated_feed)
147
+ @feed.entries.should include(@new_entry)
148
+ @feed.entries.should include(@old_entry)
149
+ end
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,86 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::AtomEntry do
4
+ before(:each) do
5
+ # I don't really like doing it this way because these unit test should only rely on AtomEntry,
6
+ # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
7
+ @entry = Feedzirra::Parser::Atom.parse(sample_atom_feed).entries.first
8
+ end
9
+
10
+ it "should parse the title" do
11
+ @entry.title.should == "AWS Job: Architect & Designer Position in Turkey"
12
+ end
13
+
14
+ it "should parse the url" do
15
+ @entry.url.should == "http://aws.typepad.com/aws/2009/01/aws-job-architect-designer-position-in-turkey.html"
16
+ end
17
+
18
+ it "should parse the url even when" do
19
+ Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).entries.first.url.should == "http://www.innoq.com/blog/phaus/2009/07/ja.html"
20
+ end
21
+
22
+ it "should parse the author" do
23
+ @entry.author.should == "AWS Editor"
24
+ end
25
+
26
+ it "should parse the content" do
27
+ @entry.content.should == sample_atom_entry_content
28
+ end
29
+
30
+ it "should provide a summary" do
31
+ @entry.summary.should == "Late last year an entrepreneur from Turkey visited me at Amazon HQ in Seattle. We talked about his plans to use AWS as part of his new social video portal startup. I won't spill any beans before he's ready to..."
32
+ end
33
+
34
+ it "should parse the published date" do
35
+ @entry.published.to_s.should == "Fri Jan 16 18:21:00 UTC 2009"
36
+ end
37
+
38
+ it "should parse the categories" do
39
+ @entry.categories.should == ['Turkey', 'Seattle']
40
+ end
41
+
42
+ it "should parse the updated date" do
43
+ @entry.updated.to_s.should == "Fri Jan 16 18:21:00 UTC 2009"
44
+ end
45
+
46
+ it "should parse the id" do
47
+ @entry.id.should == "tag:typepad.com,2003:post-61484736"
48
+ end
49
+
50
+ it "should support each" do
51
+ @entry.respond_to? :each
52
+ end
53
+
54
+ it "should be able to list out all fields with each" do
55
+ all_fields = []
56
+ @entry.each do |field, value|
57
+ all_fields << field
58
+ end
59
+ all_fields.sort == ['author', 'categories', 'content', 'id', 'published', 'summary', 'title', 'url']
60
+ end
61
+
62
+ it "should be able to list out all values with each" do
63
+ title_value = ''
64
+ @entry.each do |field, value|
65
+ title_value = value if field == 'title'
66
+ end
67
+ title_value.should == "AWS Job: Architect & Designer Position in Turkey"
68
+ end
69
+
70
+ it "should support checking if a field exists in the entry" do
71
+ @entry.include?('title') && @entry.include?('author')
72
+ end
73
+
74
+ it "should allow access to fields with hash syntax" do
75
+ @entry['title'] == @entry.title
76
+ @entry['title'].should == "AWS Job: Architect & Designer Position in Turkey"
77
+ @entry['author'] == @entry.author
78
+ @entry['author'].should == "AWS Editor"
79
+ end
80
+
81
+ it "should allow setting field values with hash syntax" do
82
+ @entry['title'] = "Foobar"
83
+ @entry.title.should == "Foobar"
84
+ end
85
+
86
+ end
@@ -0,0 +1,47 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::AtomFeedBurnerEntry do
4
+ before(:each) do
5
+ # I don't really like doing it this way because these unit test should only rely on AtomEntry,
6
+ # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
7
+ @entry = Feedzirra::Parser::AtomFeedBurner.parse(sample_feedburner_atom_feed).entries.first
8
+ end
9
+
10
+ it "should parse the title" do
11
+ @entry.title.should == "Making a Ruby C library even faster"
12
+ end
13
+
14
+ it "should be able to fetch a url via the 'alternate' rel if no origLink exists" do
15
+ entry = Feedzirra::Parser::AtomFeedBurner.parse(File.read("#{File.dirname(__FILE__)}/../../sample_feeds/PaulDixExplainsNothingAlternate.xml")).entries.first
16
+ entry.url.should == 'http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~3/519925023/making-a-ruby-c-library-even-faster.html'
17
+ end
18
+
19
+ it "should parse the url" do
20
+ @entry.url.should == "http://www.pauldix.net/2009/01/making-a-ruby-c-library-even-faster.html"
21
+ end
22
+
23
+ it "should parse the url when there is no alternate" do
24
+ entry = Feedzirra::Parser::AtomFeedBurner.parse(File.read("#{File.dirname(__FILE__)}/../../sample_feeds/FeedBurnerUrlNoAlternate.xml")).entries.first
25
+ entry.url.should == 'http://example.com/QQQQ.html'
26
+ end
27
+
28
+ it "should parse the author" do
29
+ @entry.author.should == "Paul Dix"
30
+ end
31
+
32
+ it "should parse the content" do
33
+ @entry.content.should == sample_feedburner_atom_entry_content
34
+ end
35
+
36
+ it "should provide a summary" do
37
+ @entry.summary.should == "Last week I released the first version of a SAX based XML parsing library called SAX-Machine. It uses Nokogiri, which uses libxml, so it's pretty fast. However, I felt that it could be even faster. The only question was how..."
38
+ end
39
+
40
+ it "should parse the published date" do
41
+ @entry.published.to_s.should == "Thu Jan 22 15:50:22 UTC 2009"
42
+ end
43
+
44
+ it "should parse the categories" do
45
+ @entry.categories.should == ['Ruby', 'Another Category']
46
+ end
47
+ end
@@ -0,0 +1,43 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::AtomFeedBurner do
4
+ describe "#will_parse?" do
5
+ it "should return true for a feedburner atom feed" do
6
+ Feedzirra::Parser::AtomFeedBurner.should be_able_to_parse(sample_feedburner_atom_feed)
7
+ end
8
+
9
+ it "should return false for an rdf feed" do
10
+ Feedzirra::Parser::AtomFeedBurner.should_not be_able_to_parse(sample_rdf_feed)
11
+ end
12
+
13
+ it "should return false for a regular atom feed" do
14
+ Feedzirra::Parser::AtomFeedBurner.should_not be_able_to_parse(sample_atom_feed)
15
+ end
16
+ end
17
+
18
+ describe "parsing" do
19
+ before(:each) do
20
+ @feed = Feedzirra::Parser::AtomFeedBurner.parse(sample_feedburner_atom_feed)
21
+ end
22
+
23
+ it "should parse the title" do
24
+ @feed.title.should == "Paul Dix Explains Nothing"
25
+ end
26
+
27
+ it "should parse the description" do
28
+ @feed.description.should == "Entrepreneurship, programming, software development, politics, NYC, and random thoughts."
29
+ end
30
+
31
+ it "should parse the url" do
32
+ @feed.url.should == "http://www.pauldix.net/"
33
+ end
34
+
35
+ it "should parse the feed_url" do
36
+ @feed.feed_url.should == "http://feeds.feedburner.com/PaulDixExplainsNothing"
37
+ end
38
+
39
+ it "should parse entries" do
40
+ @feed.entries.size.should == 5
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,47 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::Atom do
4
+ describe "#will_parse?" do
5
+ it "should return true for an atom feed" do
6
+ Feedzirra::Parser::Atom.should be_able_to_parse(sample_atom_feed)
7
+ end
8
+
9
+ it "should return false for an rdf feed" do
10
+ Feedzirra::Parser::Atom.should_not be_able_to_parse(sample_rdf_feed)
11
+ end
12
+ end
13
+
14
+ describe "parsing" do
15
+ before(:each) do
16
+ @feed = Feedzirra::Parser::Atom.parse(sample_atom_feed)
17
+ end
18
+
19
+ it "should parse the title" do
20
+ @feed.title.should == "Amazon Web Services Blog"
21
+ end
22
+
23
+ it "should parse the description" do
24
+ @feed.description.should == "Amazon Web Services, Products, Tools, and Developer Information..."
25
+ end
26
+
27
+ it "should parse the url" do
28
+ @feed.url.should == "http://aws.typepad.com/aws/"
29
+ end
30
+
31
+ it "should parse the url even when it doesn't have the type='text/html' attribute" do
32
+ Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).url.should == "http://www.innoq.com/planet/"
33
+ end
34
+
35
+ it "should parse the feed_url even when it doesn't have the type='application/atom+xml' attribute" do
36
+ Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).feed_url.should == "http://www.innoq.com/planet/atom.xml"
37
+ end
38
+
39
+ it "should parse the feed_url" do
40
+ @feed.feed_url.should == "http://aws.typepad.com/aws/atom.xml"
41
+ end
42
+
43
+ it "should parse entries" do
44
+ @feed.entries.size.should == 10
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,48 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::ITunesRSSItem do
4
+ before(:each) do
5
+ # I don't really like doing it this way because these unit test should only rely on ITunesRssItem,
6
+ # but this is actually how it should work. You would never just pass entry xml straight to the ITunesRssItem
7
+ @item = Feedzirra::Parser::ITunesRSS.parse(sample_itunes_feed).entries.first
8
+ end
9
+
10
+ it "should parse the title" do
11
+ @item.title.should == "Shake Shake Shake Your Spices"
12
+ end
13
+
14
+ it "should parse the author" do
15
+ @item.itunes_author.should == "John Doe"
16
+ end
17
+
18
+ it "should parse the subtitle" do
19
+ @item.itunes_subtitle.should == "A short primer on table spices"
20
+ end
21
+
22
+ it "should parse the summary" do
23
+ @item.itunes_summary.should == "This week we talk about salt and pepper shakers, comparing and contrasting pour rates, construction materials, and overall aesthetics. Come and join the party!"
24
+ end
25
+
26
+ it "should parse the enclosure" do
27
+ @item.enclosure_length.should == "8727310"
28
+ @item.enclosure_type.should == "audio/x-m4a"
29
+ @item.enclosure_url.should == "http://example.com/podcasts/everything/AllAboutEverythingEpisode3.m4a"
30
+ end
31
+
32
+ it "should parse the guid" do
33
+ @item.guid.should == "http://example.com/podcasts/archive/aae20050615.m4a"
34
+ end
35
+
36
+ it "should parse the published date" do
37
+ @item.published.to_s.should == "Wed Jun 15 19:00:00 UTC 2005"
38
+ end
39
+
40
+ it "should parse the duration" do
41
+ @item.itunes_duration.should == "7:04"
42
+ end
43
+
44
+ it "should parse the keywords" do
45
+ @item.itunes_keywords.should == "salt, pepper, shaker, exciting"
46
+ end
47
+
48
+ end
@@ -0,0 +1,18 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::ITunesRSSOwner do
4
+ before(:each) do
5
+ # I don't really like doing it this way because these unit test should only rely on RSSEntry,
6
+ # but this is actually how it should work. You would never just pass entry xml straight to the ITunesRssOwner
7
+ @owner = Feedzirra::Parser::ITunesRSS.parse(sample_itunes_feed).itunes_owners.first
8
+ end
9
+
10
+ it "should parse the name" do
11
+ @owner.name.should == "John Doe"
12
+ end
13
+
14
+ it "should parse the email" do
15
+ @owner.email.should == "john.doe@example.com"
16
+ end
17
+
18
+ end
@@ -0,0 +1,50 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::ITunesRSS do
4
+ describe "#will_parse?" do
5
+ it "should return true for an itunes RSS feed" do
6
+ Feedzirra::Parser::ITunesRSS.should be_able_to_parse(sample_itunes_feed)
7
+ end
8
+
9
+ it "should return fase for an atom feed" do
10
+ Feedzirra::Parser::ITunesRSS.should_not be_able_to_parse(sample_atom_feed)
11
+ end
12
+ end
13
+
14
+ describe "parsing" do
15
+ before(:each) do
16
+ @feed = Feedzirra::Parser::ITunesRSS.parse(sample_itunes_feed)
17
+ end
18
+
19
+ it "should parse the subtitle" do
20
+ @feed.itunes_subtitle.should == "A show about everything"
21
+ end
22
+
23
+ it "should parse the author" do
24
+ @feed.itunes_author.should == "John Doe"
25
+ end
26
+
27
+ it "should parse an owner" do
28
+ @feed.itunes_owners.size.should == 1
29
+ end
30
+
31
+ it "should parse an image" do
32
+ @feed.itunes_image.should == "http://example.com/podcasts/everything/AllAboutEverything.jpg"
33
+ end
34
+
35
+ it "should parse categories" do
36
+ @feed.itunes_categories.size == 3
37
+ @feed.itunes_categories[0] == "Technology"
38
+ @feed.itunes_categories[1] == "Gadgets"
39
+ @feed.itunes_categories[2] == "TV &amp; Film"
40
+ end
41
+
42
+ it "should parse the summary" do
43
+ @feed.itunes_summary.should == "All About Everything is a show about everything. Each week we dive into any subject known to man and talk about it as much as we can. Look for our Podcast in the iTunes Music Store"
44
+ end
45
+
46
+ it "should parse entries" do
47
+ @feed.entries.size.should == 3
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,85 @@
1
+ # coding: utf-8
2
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
3
+
4
+ describe Feedzirra::Parser::RSSEntry do
5
+ before(:each) do
6
+ # I don't really like doing it this way because these unit test should only rely on RSSEntry,
7
+ # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
8
+ @entry = Feedzirra::Parser::RSS.parse(sample_rss_feed).entries.first
9
+ end
10
+
11
+ after(:each) do
12
+ # We change the title in one or more specs to test []=
13
+ if @entry.title != "Nokogiri’s Slop Feature"
14
+ @entry.title = Feedzirra::Parser::RSS.parse(sample_rss_feed).entries.first.title
15
+ end
16
+ end
17
+
18
+ it "should parse the title" do
19
+ @entry.title.should == "Nokogiri’s Slop Feature"
20
+ end
21
+
22
+ it "should parse the url" do
23
+ @entry.url.should == "http://tenderlovemaking.com/2008/12/04/nokogiris-slop-feature/"
24
+ end
25
+
26
+ it "should parse the author" do
27
+ @entry.author.should == "Aaron Patterson"
28
+ end
29
+
30
+ it "should parse the content" do
31
+ @entry.content.should == sample_rss_entry_content
32
+ end
33
+
34
+ it "should provide a summary" do
35
+ @entry.summary.should == "Oops! When I released nokogiri version 1.0.7, I totally forgot to talk about Nokogiri::Slop() feature that was added. Why is it called \"slop\"? It lets you sloppily explore documents. Basically, it decorates your document with method_missing() that allows you to search your document via method calls.\nGiven this document:\n\ndoc = Nokogiri::Slop&#40;&#60;&#60;-eohtml&#41;\n&#60;html&#62;\n&#160; &#60;body&#62;\n&#160; [...]"
36
+ end
37
+
38
+ it "should parse the published date" do
39
+ @entry.published.to_s.should == "Thu Dec 04 17:17:49 UTC 2008"
40
+ end
41
+
42
+ it "should parse the categories" do
43
+ @entry.categories.should == ['computadora', 'nokogiri', 'rails']
44
+ end
45
+
46
+ it "should parse the guid as id" do
47
+ @entry.id.should == "http://tenderlovemaking.com/?p=198"
48
+ end
49
+
50
+ it "should support each" do
51
+ @entry.respond_to? :each
52
+ end
53
+
54
+ it "should be able to list out all fields with each" do
55
+ all_fields = []
56
+ @entry.each do |field, value|
57
+ all_fields << field
58
+ end
59
+ all_fields.sort == ['author', 'categories', 'content', 'id', 'published', 'summary', 'title', 'url']
60
+ end
61
+
62
+ it "should be able to list out all values with each" do
63
+ title_value = ''
64
+ @entry.each do |field, value|
65
+ title_value = value if field == 'title'
66
+ end
67
+ title_value.should == "Nokogiri’s Slop Feature"
68
+ end
69
+
70
+ it "should support checking if a field exists in the entry" do
71
+ @entry.include?('title') && @entry.include?('author')
72
+ end
73
+
74
+ it "should allow access to fields with hash syntax" do
75
+ @entry['title'] == @entry.title
76
+ @entry['title'].should == "Nokogiri’s Slop Feature"
77
+ @entry['author'] == @entry.author
78
+ @entry['author'].should == "Aaron Patterson"
79
+ end
80
+
81
+ it "should allow setting field values with hash syntax" do
82
+ @entry['title'] = "Foobar"
83
+ @entry.title.should == "Foobar"
84
+ end
85
+ end
@@ -0,0 +1,45 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::RSS do
4
+ describe "#will_parse?" do
5
+ it "should return true for an RSS feed" do
6
+ Feedzirra::Parser::RSS.should be_able_to_parse(sample_rss_feed)
7
+ end
8
+
9
+ # this is no longer true. combined rdf and rss into one
10
+ # it "should return false for an rdf feed" do
11
+ # Feedzirra::RSS.should_not be_able_to_parse(sample_rdf_feed)
12
+ # end
13
+
14
+ it "should return fase for an atom feed" do
15
+ Feedzirra::Parser::RSS.should_not be_able_to_parse(sample_atom_feed)
16
+ end
17
+ end
18
+
19
+ describe "parsing" do
20
+ before(:each) do
21
+ @feed = Feedzirra::Parser::RSS.parse(sample_rss_feed)
22
+ end
23
+
24
+ it "should parse the title" do
25
+ @feed.title.should == "Tender Lovemaking"
26
+ end
27
+
28
+ it "should parse the description" do
29
+ @feed.description.should == "The act of making love, tenderly."
30
+ end
31
+
32
+ it "should parse the url" do
33
+ @feed.url.should == "http://tenderlovemaking.com"
34
+ end
35
+
36
+ it "should provide an accessor for the feed_url" do
37
+ @feed.respond_to?(:feed_url).should == true
38
+ @feed.respond_to?(:feed_url=).should == true
39
+ end
40
+
41
+ it "should parse entries" do
42
+ @feed.entries.size.should == 10
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,20 @@
1
+ require 'rubygems'
2
+ require File.dirname(__FILE__) + "/../../lib/feedzirra.rb"
3
+
4
+ feed_urls = File.readlines(File.dirname(__FILE__) + "/top5kfeeds.dat").collect {|line| line.split.first}
5
+
6
+ success = lambda do |url, feed|
7
+ puts "SUCCESS - #{feed.title} - #{url}"
8
+ end
9
+
10
+ failed_feeds = []
11
+ failure = lambda do |url, response_code, header, body|
12
+ failed_feeds << url if response_code == 200
13
+ puts "*********** FAILED with #{response_code} on #{url}"
14
+ end
15
+
16
+ Feedzirra::Feed.fetch_and_parse(feed_urls, :on_success => success, :on_failure => failure)
17
+
18
+ File.open("./failed_urls.txt", "w") do |f|
19
+ f.write failed_feeds.join("\n")
20
+ end