jashmenn-feedzirra 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. data/.rspec +1 -0
  2. data/README.rdoc +177 -0
  3. data/Rakefile +51 -0
  4. data/lib/feedzirra.rb +20 -0
  5. data/lib/feedzirra/core_ext.rb +3 -0
  6. data/lib/feedzirra/core_ext/date.rb +19 -0
  7. data/lib/feedzirra/core_ext/string.rb +9 -0
  8. data/lib/feedzirra/feed.rb +384 -0
  9. data/lib/feedzirra/feed_entry_utilities.rb +65 -0
  10. data/lib/feedzirra/feed_utilities.rb +61 -0
  11. data/lib/feedzirra/parser.rb +20 -0
  12. data/lib/feedzirra/parser/atom.rb +29 -0
  13. data/lib/feedzirra/parser/atom_entry.rb +30 -0
  14. data/lib/feedzirra/parser/atom_feed_burner.rb +21 -0
  15. data/lib/feedzirra/parser/atom_feed_burner_entry.rb +31 -0
  16. data/lib/feedzirra/parser/google_docs_atom.rb +28 -0
  17. data/lib/feedzirra/parser/google_docs_atom_entry.rb +29 -0
  18. data/lib/feedzirra/parser/itunes_rss.rb +50 -0
  19. data/lib/feedzirra/parser/itunes_rss_item.rb +32 -0
  20. data/lib/feedzirra/parser/itunes_rss_owner.rb +12 -0
  21. data/lib/feedzirra/parser/rss.rb +22 -0
  22. data/lib/feedzirra/parser/rss_entry.rb +34 -0
  23. data/lib/feedzirra/parser/rss_feed_burner.rb +22 -0
  24. data/lib/feedzirra/parser/rss_feed_burner_entry.rb +40 -0
  25. data/lib/feedzirra/version.rb +3 -0
  26. data/spec/benchmarks/feed_benchmarks.rb +98 -0
  27. data/spec/benchmarks/feedzirra_benchmarks.rb +40 -0
  28. data/spec/benchmarks/fetching_benchmarks.rb +28 -0
  29. data/spec/benchmarks/parsing_benchmark.rb +30 -0
  30. data/spec/benchmarks/updating_benchmarks.rb +33 -0
  31. data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
  32. data/spec/feedzirra/feed_spec.rb +597 -0
  33. data/spec/feedzirra/feed_utilities_spec.rb +152 -0
  34. data/spec/feedzirra/parser/atom_entry_spec.rb +86 -0
  35. data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +47 -0
  36. data/spec/feedzirra/parser/atom_feed_burner_spec.rb +47 -0
  37. data/spec/feedzirra/parser/atom_spec.rb +51 -0
  38. data/spec/feedzirra/parser/google_docs_atom_entry_spec.rb +22 -0
  39. data/spec/feedzirra/parser/google_docs_atom_spec.rb +31 -0
  40. data/spec/feedzirra/parser/itunes_rss_item_spec.rb +48 -0
  41. data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +18 -0
  42. data/spec/feedzirra/parser/itunes_rss_spec.rb +54 -0
  43. data/spec/feedzirra/parser/rss_entry_spec.rb +85 -0
  44. data/spec/feedzirra/parser/rss_feed_burner_entry_spec.rb +85 -0
  45. data/spec/feedzirra/parser/rss_feed_burner_spec.rb +52 -0
  46. data/spec/feedzirra/parser/rss_spec.rb +49 -0
  47. data/spec/sample_feeds/run_against_sample.rb +20 -0
  48. data/spec/spec_helper.rb +78 -0
  49. metadata +228 -0
@@ -0,0 +1,18 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::ITunesRSSOwner do
4
+ before(:each) do
5
+ # I don't really like doing it this way because these unit test should only rely on RSSEntry,
6
+ # but this is actually how it should work. You would never just pass entry xml straight to the ITunesRssOwner
7
+ @owner = Feedzirra::Parser::ITunesRSS.parse(sample_itunes_feed).itunes_owners.first
8
+ end
9
+
10
+ it "should parse the name" do
11
+ @owner.name.should == "John Doe"
12
+ end
13
+
14
+ it "should parse the email" do
15
+ @owner.email.should == "john.doe@example.com"
16
+ end
17
+
18
+ end
@@ -0,0 +1,54 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::ITunesRSS do
4
+ describe "#will_parse?" do
5
+ it "should return true for an itunes RSS feed" do
6
+ Feedzirra::Parser::ITunesRSS.should be_able_to_parse(sample_itunes_feed)
7
+ end
8
+
9
+ it "should return fase for an atom feed" do
10
+ Feedzirra::Parser::ITunesRSS.should_not be_able_to_parse(sample_atom_feed)
11
+ end
12
+
13
+ it "should return false for an rss feedburner feed" do
14
+ Feedzirra::Parser::ITunesRSS.should_not be_able_to_parse(sample_rss_feed_burner_feed)
15
+ end
16
+ end
17
+
18
+ describe "parsing" do
19
+ before(:each) do
20
+ @feed = Feedzirra::Parser::ITunesRSS.parse(sample_itunes_feed)
21
+ end
22
+
23
+ it "should parse the subtitle" do
24
+ @feed.itunes_subtitle.should == "A show about everything"
25
+ end
26
+
27
+ it "should parse the author" do
28
+ @feed.itunes_author.should == "John Doe"
29
+ end
30
+
31
+ it "should parse an owner" do
32
+ @feed.itunes_owners.size.should == 1
33
+ end
34
+
35
+ it "should parse an image" do
36
+ @feed.itunes_image.should == "http://example.com/podcasts/everything/AllAboutEverything.jpg"
37
+ end
38
+
39
+ it "should parse categories" do
40
+ @feed.itunes_categories.size == 3
41
+ @feed.itunes_categories[0] == "Technology"
42
+ @feed.itunes_categories[1] == "Gadgets"
43
+ @feed.itunes_categories[2] == "TV & Film"
44
+ end
45
+
46
+ it "should parse the summary" do
47
+ @feed.itunes_summary.should == "All About Everything is a show about everything. Each week we dive into any subject known to man and talk about it as much as we can. Look for our Podcast in the iTunes Music Store"
48
+ end
49
+
50
+ it "should parse entries" do
51
+ @feed.entries.size.should == 3
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,85 @@
1
+ # coding: utf-8
2
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
3
+
4
+ describe Feedzirra::Parser::RSSEntry do
5
+ before(:each) do
6
+ # I don't really like doing it this way because these unit test should only rely on RSSEntry,
7
+ # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
8
+ @entry = Feedzirra::Parser::RSS.parse(sample_rss_feed).entries.first
9
+ end
10
+
11
+ after(:each) do
12
+ # We change the title in one or more specs to test []=
13
+ if @entry.title != "Nokogiri’s Slop Feature"
14
+ @entry.title = Feedzirra::Parser::RSS.parse(sample_rss_feed).entries.first.title
15
+ end
16
+ end
17
+
18
+ it "should parse the title" do
19
+ @entry.title.should == "Nokogiri’s Slop Feature"
20
+ end
21
+
22
+ it "should parse the url" do
23
+ @entry.url.should == "http://tenderlovemaking.com/2008/12/04/nokogiris-slop-feature/"
24
+ end
25
+
26
+ it "should parse the author" do
27
+ @entry.author.should == "Aaron Patterson"
28
+ end
29
+
30
+ it "should parse the content" do
31
+ @entry.content.should == sample_rss_entry_content
32
+ end
33
+
34
+ it "should provide a summary" do
35
+ @entry.summary.should == "Oops! When I released nokogiri version 1.0.7, I totally forgot to talk about Nokogiri::Slop() feature that was added. Why is it called \"slop\"? It lets you sloppily explore documents. Basically, it decorates your document with method_missing() that allows you to search your document via method calls.\nGiven this document:\n\ndoc = Nokogiri::Slop(<<-eohtml)\n<html>\n  <body>\n  [...]"
36
+ end
37
+
38
+ it "should parse the published date" do
39
+ @entry.published.to_s.should == "Thu Dec 04 17:17:49 UTC 2008"
40
+ end
41
+
42
+ it "should parse the categories" do
43
+ @entry.categories.should == ['computadora', 'nokogiri', 'rails']
44
+ end
45
+
46
+ it "should parse the guid as id" do
47
+ @entry.id.should == "http://tenderlovemaking.com/?p=198"
48
+ end
49
+
50
+ it "should support each" do
51
+ @entry.respond_to? :each
52
+ end
53
+
54
+ it "should be able to list out all fields with each" do
55
+ all_fields = []
56
+ @entry.each do |field, value|
57
+ all_fields << field
58
+ end
59
+ all_fields.sort == ['author', 'categories', 'content', 'id', 'published', 'summary', 'title', 'url']
60
+ end
61
+
62
+ it "should be able to list out all values with each" do
63
+ title_value = ''
64
+ @entry.each do |field, value|
65
+ title_value = value if field == 'title'
66
+ end
67
+ title_value.should == "Nokogiri’s Slop Feature"
68
+ end
69
+
70
+ it "should support checking if a field exists in the entry" do
71
+ @entry.include?('title') && @entry.include?('author')
72
+ end
73
+
74
+ it "should allow access to fields with hash syntax" do
75
+ @entry['title'] == @entry.title
76
+ @entry['title'].should == "Nokogiri’s Slop Feature"
77
+ @entry['author'] == @entry.author
78
+ @entry['author'].should == "Aaron Patterson"
79
+ end
80
+
81
+ it "should allow setting field values with hash syntax" do
82
+ @entry['title'] = "Foobar"
83
+ @entry.title.should == "Foobar"
84
+ end
85
+ end
@@ -0,0 +1,85 @@
1
+ # coding: utf-8
2
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
3
+
4
+ describe Feedzirra::Parser::RSSFeedBurnerEntry do
5
+ before(:each) do
6
+ # I don't really like doing it this way because these unit test should only rely on RSSEntry,
7
+ # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
8
+ @entry = Feedzirra::Parser::RSSFeedBurner.parse(sample_rss_feed_burner_feed).entries.first
9
+ end
10
+
11
+ after(:each) do
12
+ # We change the title in one or more specs to test []=
13
+ if @entry.title != "Angie’s List Sets Price Range IPO At $11 To $13 Per Share; Valued At Over $600M"
14
+ @entry.title = Feedzirra::Parser::RSS.parse(sample_rss_feed_burner_feed).entries.first.title
15
+ end
16
+ end
17
+
18
+ it "should parse the title" do
19
+ @entry.title.should == "Angie’s List Sets Price Range IPO At $11 To $13 Per Share; Valued At Over $600M"
20
+ end
21
+
22
+ it "should parse the original url" do
23
+ @entry.url.should == "http://techcrunch.com/2011/11/02/angies-list-prices-ipo-at-11-to-13-per-share-valued-at-over-600m/"
24
+ end
25
+
26
+ it "should parse the author" do
27
+ @entry.author.should == "Leena Rao"
28
+ end
29
+
30
+ it "should parse the content" do
31
+ @entry.content.should == sample_rss_feed_burner_entry_content
32
+ end
33
+
34
+ it "should provide a summary" do
35
+ @entry.summary.should == sample_rss_feed_burner_entry_description
36
+ end
37
+
38
+ it "should parse the published date" do
39
+ @entry.published.to_s.should == "Wed Nov 02 17:25:27 UTC 2011"
40
+ end
41
+
42
+ it "should parse the categories" do
43
+ @entry.categories.should == ["TC", "angie\\'s list"]
44
+ end
45
+
46
+ it "should parse the guid as id" do
47
+ @entry.id.should == "http://techcrunch.com/?p=446154"
48
+ end
49
+
50
+ it "should support each" do
51
+ @entry.respond_to? :each
52
+ end
53
+
54
+ it "should be able to list out all fields with each" do
55
+ all_fields = []
56
+ @entry.each do |field, value|
57
+ all_fields << field
58
+ end
59
+ all_fields.sort == ['author', 'categories', 'content', 'id', 'published', 'summary', 'title', 'url']
60
+ end
61
+
62
+ it "should be able to list out all values with each" do
63
+ title_value = ''
64
+ @entry.each do |field, value|
65
+ title_value = value if field == 'title'
66
+ end
67
+ title_value.should == "Angie’s List Sets Price Range IPO At $11 To $13 Per Share; Valued At Over $600M"
68
+ end
69
+
70
+ it "should support checking if a field exists in the entry" do
71
+ @entry.include?('title') && @entry.include?('author')
72
+ end
73
+
74
+ it "should allow access to fields with hash syntax" do
75
+ @entry['title'] == @entry.title
76
+ @entry['title'].should == "Angie’s List Sets Price Range IPO At $11 To $13 Per Share; Valued At Over $600M"
77
+ @entry['author'] == @entry.author
78
+ @entry['author'].should == "Leena Rao"
79
+ end
80
+
81
+ it "should allow setting field values with hash syntax" do
82
+ @entry['title'] = "Foobar"
83
+ @entry.title.should == "Foobar"
84
+ end
85
+ end
@@ -0,0 +1,52 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::RSSFeedBurner do
4
+ describe "#will_parse?" do
5
+ it "should return true for a feedburner rss feed" do
6
+ Feedzirra::Parser::RSSFeedBurner.should be_able_to_parse(sample_rss_feed_burner_feed)
7
+ end
8
+
9
+ it "should return false for a regular RSS feed" do
10
+ Feedzirra::Parser::RSSFeedBurner.should_not be_able_to_parse(sample_rss_feed)
11
+ end
12
+
13
+ it "should return false for a feedburner atom feed" do
14
+ Feedzirra::Parser::RSSFeedBurner.should_not be_able_to_parse(sample_feedburner_atom_feed)
15
+ end
16
+
17
+ it "should return false for an rdf feed" do
18
+ Feedzirra::Parser::RSSFeedBurner.should_not be_able_to_parse(sample_rdf_feed)
19
+ end
20
+
21
+ it "should return false for a regular atom feed" do
22
+ Feedzirra::Parser::RSSFeedBurner.should_not be_able_to_parse(sample_atom_feed)
23
+ end
24
+ end
25
+
26
+ describe "parsing" do
27
+ before(:each) do
28
+ @feed = Feedzirra::Parser::RSSFeedBurner.parse(sample_rss_feed_burner_feed)
29
+ end
30
+
31
+ it "should parse the title" do
32
+ @feed.title.should == "TechCrunch"
33
+ end
34
+
35
+ it "should parse the description" do
36
+ @feed.description.should == "TechCrunch is a group-edited blog that profiles the companies, products and events defining and transforming the new web."
37
+ end
38
+
39
+ it "should parse the url" do
40
+ @feed.url.should == "http://techcrunch.com"
41
+ end
42
+
43
+ it "should provide an accessor for the feed_url" do
44
+ @feed.respond_to?(:feed_url).should == true
45
+ @feed.respond_to?(:feed_url=).should == true
46
+ end
47
+
48
+ it "should parse entries" do
49
+ @feed.entries.size.should == 20
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,49 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::RSS do
4
+ describe "#will_parse?" do
5
+ it "should return true for an RSS feed" do
6
+ Feedzirra::Parser::RSS.should be_able_to_parse(sample_rss_feed)
7
+ end
8
+
9
+ # this is no longer true. combined rdf and rss into one
10
+ # it "should return false for an rdf feed" do
11
+ # Feedzirra::RSS.should_not be_able_to_parse(sample_rdf_feed)
12
+ # end
13
+
14
+ it "should return false for an atom feed" do
15
+ Feedzirra::Parser::RSS.should_not be_able_to_parse(sample_atom_feed)
16
+ end
17
+
18
+ it "should return false for an rss feedburner feed" do
19
+ Feedzirra::Parser::RSS.should_not be_able_to_parse(sample_rss_feed_burner_feed)
20
+ end
21
+ end
22
+
23
+ describe "parsing" do
24
+ before(:each) do
25
+ @feed = Feedzirra::Parser::RSS.parse(sample_rss_feed)
26
+ end
27
+
28
+ it "should parse the title" do
29
+ @feed.title.should == "Tender Lovemaking"
30
+ end
31
+
32
+ it "should parse the description" do
33
+ @feed.description.should == "The act of making love, tenderly."
34
+ end
35
+
36
+ it "should parse the url" do
37
+ @feed.url.should == "http://tenderlovemaking.com"
38
+ end
39
+
40
+ it "should provide an accessor for the feed_url" do
41
+ @feed.respond_to?(:feed_url).should == true
42
+ @feed.respond_to?(:feed_url=).should == true
43
+ end
44
+
45
+ it "should parse entries" do
46
+ @feed.entries.size.should == 10
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,20 @@
1
+ require 'rubygems'
2
+ require File.dirname(__FILE__) + "/../../lib/feedzirra.rb"
3
+
4
+ feed_urls = File.readlines(File.dirname(__FILE__) + "/top5kfeeds.dat").collect {|line| line.split.first}
5
+
6
+ success = lambda do |url, feed|
7
+ puts "SUCCESS - #{feed.title} - #{url}"
8
+ end
9
+
10
+ failed_feeds = []
11
+ failure = lambda do |url, response_code, header, body|
12
+ failed_feeds << url if response_code == 200
13
+ puts "*********** FAILED with #{response_code} on #{url}"
14
+ end
15
+
16
+ Feedzirra::Feed.fetch_and_parse(feed_urls, :on_success => success, :on_failure => failure)
17
+
18
+ File.open("./failed_urls.txt", "w") do |f|
19
+ f.write failed_feeds.join("\n")
20
+ end
@@ -0,0 +1,78 @@
1
+ require "rubygems"
2
+ require "rspec"
3
+
4
+ # gem install redgreen for colored test output
5
+ begin require "redgreen" unless ENV['TM_CURRENT_LINE']; rescue LoadError; end
6
+
7
+ path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
8
+ $LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
9
+
10
+ require "feedzirra"
11
+
12
+ def load_sample(filename)
13
+ File.read("#{File.dirname(__FILE__)}/sample_feeds/#{filename}")
14
+ end
15
+
16
+ def sample_atom_feed
17
+ load_sample("AmazonWebServicesBlog.xml")
18
+ end
19
+
20
+ def sample_atom_entry_content
21
+ load_sample("AmazonWebServicesBlogFirstEntryContent.xml")
22
+ end
23
+
24
+ def sample_itunes_feed
25
+ load_sample("itunes.xml")
26
+ end
27
+
28
+ def sample_rdf_feed
29
+ load_sample("HREFConsideredHarmful.xml")
30
+ end
31
+
32
+ def sample_rdf_entry_content
33
+ load_sample("HREFConsideredHarmfulFirstEntry.xml")
34
+ end
35
+
36
+ def sample_rss_feed_burner_feed
37
+ load_sample("TechCrunch.xml")
38
+ end
39
+
40
+ def sample_rss_feed_burner_entry_content
41
+ load_sample("TechCrunchFirstEntry.xml")
42
+ end
43
+
44
+ def sample_rss_feed_burner_entry_description
45
+ load_sample("TechCrunchFirstEntryDescription.xml")
46
+ end
47
+
48
+ def sample_rss_feed
49
+ load_sample("TenderLovemaking.xml")
50
+ end
51
+
52
+ def sample_rss_entry_content
53
+ load_sample("TenderLovemakingFirstEntry.xml")
54
+ end
55
+
56
+ def sample_feedburner_atom_feed
57
+ load_sample("PaulDixExplainsNothing.xml")
58
+ end
59
+
60
+ def sample_feedburner_atom_entry_content
61
+ load_sample("PaulDixExplainsNothingFirstEntryContent.xml")
62
+ end
63
+
64
+ def sample_wfw_feed
65
+ load_sample("PaulDixExplainsNothingWFW.xml")
66
+ end
67
+
68
+ def sample_google_docs_list_feed
69
+ load_sample("GoogleDocsList.xml")
70
+ end
71
+
72
+ # http://eigenclass.org/hiki/Changes+in+Ruby+1.9#l156
73
+ # Default Time.to_s changed in 1.9, monkeypatching it back
74
+ class Time
75
+ def to_s
76
+ strftime("%a %b %d %H:%M:%S %Z %Y")
77
+ end
78
+ end