seanwalbran-feedzirra 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/.rspec +1 -0
  2. data/README.rdoc +177 -0
  3. data/Rakefile +51 -0
  4. data/lib/feedzirra.rb +19 -0
  5. data/lib/feedzirra/core_ext.rb +3 -0
  6. data/lib/feedzirra/core_ext/date.rb +19 -0
  7. data/lib/feedzirra/core_ext/string.rb +9 -0
  8. data/lib/feedzirra/feed.rb +383 -0
  9. data/lib/feedzirra/feed_entry_utilities.rb +65 -0
  10. data/lib/feedzirra/feed_utilities.rb +72 -0
  11. data/lib/feedzirra/parser.rb +17 -0
  12. data/lib/feedzirra/parser/atom.rb +29 -0
  13. data/lib/feedzirra/parser/atom_entry.rb +30 -0
  14. data/lib/feedzirra/parser/atom_feed_burner.rb +21 -0
  15. data/lib/feedzirra/parser/atom_feed_burner_entry.rb +31 -0
  16. data/lib/feedzirra/parser/itunes_rss.rb +50 -0
  17. data/lib/feedzirra/parser/itunes_rss_item.rb +32 -0
  18. data/lib/feedzirra/parser/itunes_rss_owner.rb +12 -0
  19. data/lib/feedzirra/parser/rss.rb +22 -0
  20. data/lib/feedzirra/parser/rss_entry.rb +34 -0
  21. data/lib/feedzirra/parser/rss_feed_burner.rb +22 -0
  22. data/lib/feedzirra/parser/rss_feed_burner_entry.rb +40 -0
  23. data/lib/feedzirra/version.rb +3 -0
  24. data/spec/benchmarks/feed_benchmarks.rb +98 -0
  25. data/spec/benchmarks/feedzirra_benchmarks.rb +40 -0
  26. data/spec/benchmarks/fetching_benchmarks.rb +28 -0
  27. data/spec/benchmarks/parsing_benchmark.rb +30 -0
  28. data/spec/benchmarks/updating_benchmarks.rb +33 -0
  29. data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
  30. data/spec/feedzirra/feed_spec.rb +593 -0
  31. data/spec/feedzirra/feed_utilities_spec.rb +152 -0
  32. data/spec/feedzirra/parser/atom_entry_spec.rb +86 -0
  33. data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +47 -0
  34. data/spec/feedzirra/parser/atom_feed_burner_spec.rb +47 -0
  35. data/spec/feedzirra/parser/atom_spec.rb +51 -0
  36. data/spec/feedzirra/parser/itunes_rss_item_spec.rb +48 -0
  37. data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +18 -0
  38. data/spec/feedzirra/parser/itunes_rss_spec.rb +54 -0
  39. data/spec/feedzirra/parser/rss_entry_spec.rb +85 -0
  40. data/spec/feedzirra/parser/rss_feed_burner_entry_spec.rb +85 -0
  41. data/spec/feedzirra/parser/rss_feed_burner_spec.rb +52 -0
  42. data/spec/feedzirra/parser/rss_spec.rb +49 -0
  43. data/spec/sample_feeds/run_against_sample.rb +20 -0
  44. data/spec/spec_helper.rb +74 -0
  45. metadata +287 -0
@@ -0,0 +1,40 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # Parser for dealing with RDF feed entries.
5
+ class RSSFeedBurnerEntry
6
+ include SAXMachine
7
+ include FeedEntryUtilities
8
+
9
+ element :title
10
+
11
+ element :"feedburner:origLink", :as => :url
12
+ element :link, :as => :url
13
+
14
+ element :"dc:creator", :as => :author
15
+ element :author, :as => :author
16
+ element :"content:encoded", :as => :content
17
+ element :description, :as => :summary
18
+
19
+ element :pubDate, :as => :published
20
+ element :pubdate, :as => :published
21
+ element :"dc:date", :as => :published
22
+ element :"dc:Date", :as => :published
23
+ element :"dcterms:created", :as => :published
24
+
25
+
26
+ element :"dcterms:modified", :as => :updated
27
+ element :issued, :as => :published
28
+ elements :category, :as => :categories
29
+
30
+ element :guid, :as => :entry_id
31
+
32
+ def url
33
+ @url || @link
34
+ end
35
+
36
+ end
37
+
38
+ end
39
+
40
+ end
@@ -0,0 +1,3 @@
1
+ module Feedzirra
2
+ VERSION = '0.1.2'
3
+ end
@@ -0,0 +1,98 @@
1
+ # this is some spike code to compare the speed of different methods for performing
2
+ # multiple feed fetches
3
+ require 'rubygems'
4
+ require 'curb'
5
+ require 'active_support'
6
+
7
+ require 'net/http'
8
+ require 'uri'
9
+
10
+ require 'benchmark'
11
+ include Benchmark
12
+
13
+ GET_COUNT = 1
14
+ urls = ["http://www.pauldix.net"] * GET_COUNT
15
+
16
+
17
+ benchmark do |t|
18
+ t.report("taf2-curb") do
19
+ multi = Curl::Multi.new
20
+ urls.each do |url|
21
+ easy = Curl::Easy.new(url) do |curl|
22
+ curl.headers["User-Agent"] = "feedzirra"
23
+ # curl.headers["If-Modified-Since"] = Time.now.httpdate
24
+ # curl.headers["If-None-Match"] = "ziEyTl4q9GH04BR4jgkImd0GvSE"
25
+ curl.follow_location = true
26
+ curl.on_success do |c|
27
+ # puts c.header_str.inspect
28
+ # puts c.response_code
29
+ # puts c.body_str.slice(0, 500)
30
+ end
31
+ curl.on_failure do |c|
32
+ puts "**** #{c.response_code}"
33
+ end
34
+ end
35
+ multi.add(easy)
36
+ end
37
+
38
+ multi.perform
39
+ end
40
+
41
+ t.report("nethttp") do
42
+ urls.each do |url|
43
+ res = Net::HTTP.get(URI.parse(url))
44
+ # puts res.slice(0, 500)
45
+ end
46
+ end
47
+
48
+ require 'rfuzz/session'
49
+ include RFuzz
50
+ t.report("rfuzz") do
51
+ GET_COUNT.times do
52
+ http = HttpClient.new("www.pauldix.net", 80)
53
+ response = http.get("/")
54
+ if response.http_status != "200"
55
+ puts "***** #{response.http_status}"
56
+ else
57
+ # puts response.http_status
58
+ # puts response.http_body.slice(0, 500)
59
+ end
60
+ end
61
+ end
62
+
63
+ require 'eventmachine'
64
+ t.report("eventmachine") do
65
+ counter = GET_COUNT
66
+ EM.run do
67
+ GET_COUNT.times do
68
+ http = EM::Protocols::HttpClient2.connect("www.pauldix.net", 80)
69
+ request = http.get("/")
70
+ request.callback do
71
+ # puts request.status
72
+ # puts request.content.slice(0, 500)
73
+ counter -= 1
74
+ EM.stop if counter == 0
75
+ end
76
+ end
77
+ end
78
+ end
79
+
80
+
81
+ require 'curl-multi'
82
+ t.report("curl multi") do
83
+ multi = Curl::Multi.new
84
+ urls.each do |url|
85
+ on_failure = lambda do |ex|
86
+ puts "****** Failed to retrieve #{url}"
87
+ end
88
+
89
+ on_success = lambda do |body|
90
+ # puts "got #{url}"
91
+ # puts body.slice(0, 500)
92
+ end
93
+ multi.get(url, on_success, on_failure)
94
+ end
95
+
96
+ multi.select([], []) while multi.size > 0
97
+ end
98
+ end
@@ -0,0 +1,40 @@
1
+ require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
2
+ require 'rfeedparser'
3
+ require 'feed-normalizer'
4
+ require 'open-uri'
5
+
6
+ require 'benchmark'
7
+ include Benchmark
8
+
9
+ iterations = 10
10
+ urls = File.readlines(File.dirname(__FILE__) + "/../sample_feeds/successful_feed_urls.txt").slice(0, 20)
11
+ puts "benchmarks on #{urls.size} feeds"
12
+ puts "************************************"
13
+ benchmark do |t|
14
+ t.report("feedzirra") do
15
+ iterations.times do
16
+ Feedzirra::Feed.fetch_and_parse(urls, :on_success => lambda { |url, feed| $stdout.print '.'; $stdout.flush })
17
+ end
18
+ end
19
+
20
+ t.report("rfeedparser") do
21
+ iterations.times do
22
+ urls.each do |url|
23
+ feed = FeedParser.parse(url)
24
+ $stdout.print '.'
25
+ $stdout.flush
26
+ end
27
+ end
28
+ end
29
+
30
+ t.report("feed-normalizer") do
31
+ iterations.times do
32
+ urls.each do |url|
33
+ # have to use the :force option to make feed-normalizer parse an atom feed
34
+ feed = FeedNormalizer::FeedNormalizer.parse(open(url), :force_parser => FeedNormalizer::SimpleRssParser)
35
+ $stdout.print '.'
36
+ $stdout.flush
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,28 @@
1
+ require 'rubygems'
2
+ require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
3
+
4
+ require 'open-uri'
5
+
6
+ require 'benchmark'
7
+ include Benchmark
8
+
9
+ iterations = 10
10
+ urls = File.readlines(File.dirname(__FILE__) + "/../sample_feeds/successful_feed_urls.txt").slice(0, 20)
11
+ puts "benchmarks on #{urls.size} feeds"
12
+ puts "************************************"
13
+ benchmark do |t|
14
+ t.report("feedzirra open uri") do
15
+ iterations.times do
16
+ urls.each do |url|
17
+ Feedzirra::Feed.parse(open(url, "User-Agent" => "feedzirra http://github.com/pauldix/feedzirra/tree/master").read)
18
+ $stdout.print '.'; $stdout.flush
19
+ end
20
+ end
21
+ end
22
+
23
+ t.report("feedzirra fetch and parse") do
24
+ iterations.times do
25
+ Feedzirra::Feed.fetch_and_parse(urls, :on_success => lambda { |url, feed| $stdout.print '.'; $stdout.flush })
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,30 @@
1
+ require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
2
+ require 'rfeedparser'
3
+ require 'feed-normalizer'
4
+
5
+ require 'benchmark'
6
+ include Benchmark
7
+
8
+ iterations = 50
9
+ xml = File.read(File.dirname(__FILE__) + '/../sample_feeds/PaulDixExplainsNothing.xml')
10
+
11
+ benchmark do |t|
12
+ t.report("feedzirra") do
13
+ iterations.times do
14
+ Feedzirra::Feed.parse(xml)
15
+ end
16
+ end
17
+
18
+ t.report("rfeedparser") do
19
+ iterations.times do
20
+ FeedParser.parse(xml)
21
+ end
22
+ end
23
+
24
+ t.report("feed-normalizer") do
25
+ iterations.times do
26
+ # have to use the :force option to make feed-normalizer parse an atom feed
27
+ FeedNormalizer::FeedNormalizer.parse(xml, :force_parser => FeedNormalizer::SimpleRssParser)
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,33 @@
1
+ require 'rubygems'
2
+ require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
3
+
4
+ require 'benchmark'
5
+ include Benchmark
6
+
7
+ urls = File.readlines(File.dirname(__FILE__) + "/../sample_feeds/successful_feed_urls.txt")
8
+ puts "benchmarks on #{urls.size} feeds"
9
+ puts "************************************"
10
+ benchmark do |t|
11
+ feeds = {}
12
+ t.report("feedzirra fetch and parse") do
13
+ feeds = Feedzirra::Feed.fetch_and_parse(urls,
14
+ :on_success => lambda { |url, feed| $stdout.print '.'; $stdout.flush },
15
+ :on_failure => lambda {|url, response_code, header, body| puts "#{response_code} ERROR on #{url}"})
16
+ end
17
+
18
+ # curb caches the dns lookups for 60 seconds. to make things fair we have to wait for the cache to expire
19
+ puts "sleeping to wait for dns cache to clear"
20
+ 65.times {$stdout.print('.'); sleep(1)}
21
+ puts "done"
22
+
23
+ updated_feeds = []
24
+ t.report("feedzirra update") do
25
+ updated_feeds = Feedzirra::Feed.update(feeds.values.reject {|f| f.class == Fixnum},
26
+ :on_success => lambda {|feed| $stdout.print '.'; $stdout.flush},
27
+ :on_failure => lambda {|feed, response_code, header, body| puts "#{response_code} ERROR on #{feed.feed_url}"})
28
+ end
29
+
30
+ updated_feeds.each do |feed|
31
+ puts feed.feed_url if feed.updated?
32
+ end
33
+ end
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ describe Feedzirra::FeedUtilities do
4
+ before(:each) do
5
+ @klass = Class.new do
6
+ include Feedzirra::FeedEntryUtilities
7
+ end
8
+ end
9
+
10
+ describe "handling dates" do
11
+ it "should parse an ISO 8601 formatted datetime into Time" do
12
+ time = @klass.new.parse_datetime("2008-02-20T8:05:00-010:00")
13
+ time.class.should == Time
14
+ time.to_s.should == "Wed Feb 20 18:05:00 UTC 2008"
15
+ end
16
+ end
17
+
18
+ describe "sanitizing" do
19
+ before(:each) do
20
+ @feed = Feedzirra::Feed.parse(sample_atom_feed)
21
+ @entry = @feed.entries.first
22
+ end
23
+
24
+ it "should provide a sanitized title" do
25
+ new_title = "<script>this is not safe</script>" + @entry.title
26
+ @entry.title = new_title
27
+ @entry.title.sanitize.should == Loofah.scrub_fragment(new_title, :prune).to_s
28
+ end
29
+
30
+ it "should sanitize content in place" do
31
+ new_content = "<script>" + @entry.content
32
+ @entry.content = new_content.dup
33
+ @entry.content.sanitize!.should == Loofah.scrub_fragment(new_content, :prune).to_s
34
+ @entry.content.should == Loofah.scrub_fragment(new_content, :prune).to_s
35
+ end
36
+
37
+ it "should sanitize things in place" do
38
+ @entry.title += "<script>"
39
+ @entry.author += "<script>"
40
+ @entry.content += "<script>"
41
+
42
+ cleaned_title = Loofah.scrub_fragment(@entry.title, :prune).to_s
43
+ cleaned_author = Loofah.scrub_fragment(@entry.author, :prune).to_s
44
+ cleaned_content = Loofah.scrub_fragment(@entry.content, :prune).to_s
45
+
46
+ @entry.sanitize!
47
+ @entry.title.should == cleaned_title
48
+ @entry.author.should == cleaned_author
49
+ @entry.content.should == cleaned_content
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,593 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe Feedzirra::Feed do
4
+
5
+ describe "#add_common_feed_element" do
6
+ before(:all) do
7
+ Feedzirra::Feed.add_common_feed_element("generator")
8
+ end
9
+
10
+ it "should parse the added element out of Atom feeds" do
11
+ Feedzirra::Feed.parse(sample_wfw_feed).generator.should == "TypePad"
12
+ end
13
+
14
+ it "should parse the added element out of Atom Feedburner feeds" do
15
+ Feedzirra::Parser::Atom.new.should respond_to(:generator)
16
+ end
17
+
18
+ it "should parse the added element out of RSS feeds" do
19
+ Feedzirra::Parser::RSS.new.should respond_to(:generator)
20
+ end
21
+ end
22
+
23
+ describe "#add_common_feed_entry_element" do
24
+ before(:all) do
25
+ Feedzirra::Feed.add_common_feed_entry_element("wfw:commentRss", :as => :comment_rss)
26
+ end
27
+
28
+ it "should parse the added element out of Atom feeds entries" do
29
+ Feedzirra::Feed.parse(sample_wfw_feed).entries.first.comment_rss.should == "this is the new val"
30
+ end
31
+
32
+ it "should parse the added element out of Atom Feedburner feeds entries" do
33
+ Feedzirra::Parser::AtomEntry.new.should respond_to(:comment_rss)
34
+ end
35
+
36
+ it "should parse the added element out of RSS feeds entries" do
37
+ Feedzirra::Parser::RSSEntry.new.should respond_to(:comment_rss)
38
+ end
39
+ end
40
+
41
+ describe "#parse" do # many of these tests are redundant with the specific feed type tests, but I put them here for completeness
42
+ context "when there's an available parser" do
43
+ it "should parse an rdf feed" do
44
+ feed = Feedzirra::Feed.parse(sample_rdf_feed)
45
+ feed.title.should == "HREF Considered Harmful"
46
+ feed.entries.first.published.to_s.should == "Tue Sep 02 19:50:07 UTC 2008"
47
+ feed.entries.size.should == 10
48
+ end
49
+
50
+ it "should parse an rss feed" do
51
+ feed = Feedzirra::Feed.parse(sample_rss_feed)
52
+ feed.title.should == "Tender Lovemaking"
53
+ feed.entries.first.published.to_s.should == "Thu Dec 04 17:17:49 UTC 2008"
54
+ feed.entries.size.should == 10
55
+ end
56
+
57
+ it "should parse an atom feed" do
58
+ feed = Feedzirra::Feed.parse(sample_atom_feed)
59
+ feed.title.should == "Amazon Web Services Blog"
60
+ feed.entries.first.published.to_s.should == "Fri Jan 16 18:21:00 UTC 2009"
61
+ feed.entries.size.should == 10
62
+ end
63
+
64
+ it "should parse an feedburner atom feed" do
65
+ feed = Feedzirra::Feed.parse(sample_feedburner_atom_feed)
66
+ feed.title.should == "Paul Dix Explains Nothing"
67
+ feed.entries.first.published.to_s.should == "Thu Jan 22 15:50:22 UTC 2009"
68
+ feed.entries.size.should == 5
69
+ end
70
+
71
+ it "should parse an itunes feed as a standard RSS feed" do
72
+ feed = Feedzirra::Feed.parse(sample_itunes_feed)
73
+ feed.title.should == "All About Everything"
74
+ feed.entries.first.published.should == Time.parse("Wed, 15 Jun 2005 19:00:00 GMT")
75
+
76
+ # Since the commit 621957879, iTunes feeds will be parsed as standard RSS, so this
77
+ # entry should now not have a method for itunes_author.
78
+ feed.entries.first.should_not respond_to(:itunes_author)
79
+ feed.entries.size.should == 3
80
+ end
81
+ end
82
+
83
+ context "when there's no available parser" do
84
+ it "raises Feedzirra::NoParserAvailable" do
85
+ proc {
86
+ Feedzirra::Feed.parse("I'm an invalid feed")
87
+ }.should raise_error(Feedzirra::NoParserAvailable)
88
+ end
89
+ end
90
+
91
+ it "should parse an feedburner rss feed" do
92
+ feed = Feedzirra::Feed.parse(sample_rss_feed_burner_feed)
93
+ feed.title.should == "TechCrunch"
94
+ feed.entries.first.published.to_s.should == "Wed Nov 02 17:25:27 UTC 2011"
95
+ feed.entries.size.should == 20
96
+ end
97
+ end
98
+
99
+ describe "#determine_feed_parser_for_xml" do
100
+ it "should return the Feedzirra::Parser::Atom class for an atom feed" do
101
+ Feedzirra::Feed.determine_feed_parser_for_xml(sample_atom_feed).should == Feedzirra::Parser::Atom
102
+ end
103
+
104
+ it "should return the Feedzirra::Parser::AtomFeedBurner class for an atom feedburner feed" do
105
+ Feedzirra::Feed.determine_feed_parser_for_xml(sample_feedburner_atom_feed).should == Feedzirra::Parser::AtomFeedBurner
106
+ end
107
+
108
+ it "should return the Feedzirra::Parser::RSS class for an rdf/rss 1.0 feed" do
109
+ Feedzirra::Feed.determine_feed_parser_for_xml(sample_rdf_feed).should == Feedzirra::Parser::RSS
110
+ end
111
+
112
+ it "should return the Feedzirra::Parser::RSSFeedBurner class for an rss feedburner feed" do
113
+ Feedzirra::Feed.determine_feed_parser_for_xml(sample_rss_feed_burner_feed).should == Feedzirra::Parser::RSSFeedBurner
114
+ end
115
+
116
+ it "should return the Feedzirra::Parser::RSS object for an rss 2.0 feed" do
117
+ Feedzirra::Feed.determine_feed_parser_for_xml(sample_rss_feed).should == Feedzirra::Parser::RSS
118
+ end
119
+
120
+ it "should return a Feedzirra::Parser::RSS object for an itunes feed" do
121
+ Feedzirra::Feed.determine_feed_parser_for_xml(sample_itunes_feed).should == Feedzirra::Parser::RSS
122
+ end
123
+
124
+ end
125
+
126
+ describe "when adding feed types" do
127
+ it "should prioritize added types over the built in ones" do
128
+ feed_text = "Atom asdf"
129
+ Feedzirra::Parser::Atom.stub!(:able_to_parse?).and_return(true)
130
+ new_feed_type = Class.new do
131
+ def self.able_to_parse?(val)
132
+ true
133
+ end
134
+ end
135
+
136
+ new_feed_type.should be_able_to_parse(feed_text)
137
+ Feedzirra::Feed.add_feed_class(new_feed_type)
138
+ Feedzirra::Feed.determine_feed_parser_for_xml(feed_text).should == new_feed_type
139
+
140
+ # this is a hack so that this doesn't break the rest of the tests
141
+ Feedzirra::Feed.feed_classes.reject! {|o| o == new_feed_type }
142
+ end
143
+ end
144
+
145
+ describe '#etag_from_header' do
146
+ before(:each) do
147
+ @header = "HTTP/1.0 200 OK\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\nETag: ziEyTl4q9GH04BR4jgkImd0GvSE\r\nP3P: CP=\"ALL DSP COR NID CUR OUR NOR\"\r\nConnection: close\r\nContent-Type: text/xml;charset=utf-8\r\n\r\n"
148
+ end
149
+
150
+ it "should return the etag from the header if it exists" do
151
+ Feedzirra::Feed.etag_from_header(@header).should == "ziEyTl4q9GH04BR4jgkImd0GvSE"
152
+ end
153
+
154
+ it "should return nil if there is no etag in the header" do
155
+ Feedzirra::Feed.etag_from_header("foo").should be_nil
156
+ end
157
+
158
+ end
159
+
160
+ describe '#last_modified_from_header' do
161
+ before(:each) do
162
+ @header = "HTTP/1.0 200 OK\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\nETag: ziEyTl4q9GH04BR4jgkImd0GvSE\r\nP3P: CP=\"ALL DSP COR NID CUR OUR NOR\"\r\nConnection: close\r\nContent-Type: text/xml;charset=utf-8\r\n\r\n"
163
+ end
164
+
165
+ it "should return the last modified date from the header if it exists" do
166
+ Feedzirra::Feed.last_modified_from_header(@header).should == Time.parse("Wed, 28 Jan 2009 04:10:32 GMT")
167
+ end
168
+
169
+ it "should return nil if there is no last modified date in the header" do
170
+ Feedzirra::Feed.last_modified_from_header("foo").should be_nil
171
+ end
172
+ end
173
+
174
+ describe "fetching feeds" do
175
+ before(:each) do
176
+ @paul_feed = { :xml => load_sample("PaulDixExplainsNothing.xml"), :url => "http://feeds.feedburner.com/PaulDixExplainsNothing" }
177
+ @trotter_feed = { :xml => load_sample("TrotterCashionHome.xml"), :url => "http://feeds2.feedburner.com/trottercashion" }
178
+ end
179
+
180
+ describe "#fetch_raw" do
181
+ before(:each) do
182
+ @cmock = stub('cmock', :header_str => '', :body_str => @paul_feed[:xml] )
183
+ @multi = stub('curl_multi', :add => true, :perform => true)
184
+ @curl_easy = stub('curl_easy')
185
+ @curl = stub('curl', :headers => {}, :follow_location= => true, :on_failure => true)
186
+ @curl.stub!(:on_success).and_yield(@cmock)
187
+
188
+ Curl::Multi.stub!(:new).and_return(@multi)
189
+ Curl::Easy.stub!(:new).and_yield(@curl).and_return(@curl_easy)
190
+ end
191
+
192
+ it "should set user agent if it's passed as an option" do
193
+ Feedzirra::Feed.fetch_raw(@paul_feed[:url], :user_agent => 'Custom Useragent')
194
+ @curl.headers['User-Agent'].should == 'Custom Useragent'
195
+ end
196
+
197
+ it "should set user agent to default if it's not passed as an option" do
198
+ Feedzirra::Feed.fetch_raw(@paul_feed[:url])
199
+ @curl.headers['User-Agent'].should == Feedzirra::Feed::USER_AGENT
200
+ end
201
+
202
+ it "should set if modified since as an option if passed" do
203
+ Feedzirra::Feed.fetch_raw(@paul_feed[:url], :if_modified_since => Time.parse("Wed, 28 Jan 2009 04:10:32 GMT"))
204
+ @curl.headers["If-Modified-Since"].should == 'Wed, 28 Jan 2009 04:10:32 GMT'
205
+ end
206
+
207
+ it "should set if none match as an option if passed" do
208
+ Feedzirra::Feed.fetch_raw(@paul_feed[:url], :if_none_match => 'ziEyTl4q9GH04BR4jgkImd0GvSE')
209
+ @curl.headers["If-None-Match"].should == 'ziEyTl4q9GH04BR4jgkImd0GvSE'
210
+ end
211
+
212
+ it 'should set userpwd for http basic authentication if :http_authentication is passed' do
213
+ @curl.should_receive(:userpwd=).with('username:password')
214
+ Feedzirra::Feed.fetch_raw(@paul_feed[:url], :http_authentication => ['username', 'password'])
215
+ end
216
+
217
+ it 'should set accepted encodings' do
218
+ Feedzirra::Feed.fetch_raw(@paul_feed[:url], :compress => true)
219
+ @curl.headers["Accept-encoding"].should == 'gzip, deflate'
220
+ end
221
+
222
+ it "should return raw xml" do
223
+ Feedzirra::Feed.fetch_raw(@paul_feed[:url]).should =~ /^#{Regexp.escape('<?xml version="1.0" encoding="UTF-8"?>')}/
224
+ end
225
+
226
+ it "should take multiple feed urls and return a hash of urls and response xml" do
227
+ multi = stub('curl_multi', :add => true, :perform => true)
228
+ Curl::Multi.stub!(:new).and_return(multi)
229
+
230
+ paul_response = stub('paul_response', :header_str => '', :body_str => @paul_feed[:xml] )
231
+ trotter_response = stub('trotter_response', :header_str => '', :body_str => @trotter_feed[:xml] )
232
+
233
+ paul_curl = stub('paul_curl', :headers => {}, :follow_location= => true, :on_failure => true)
234
+ paul_curl.stub!(:on_success).and_yield(paul_response)
235
+
236
+ trotter_curl = stub('trotter_curl', :headers => {}, :follow_location= => true, :on_failure => true)
237
+ trotter_curl.stub!(:on_success).and_yield(trotter_response)
238
+
239
+ Curl::Easy.should_receive(:new).with(@paul_feed[:url]).ordered.and_yield(paul_curl)
240
+ Curl::Easy.should_receive(:new).with(@trotter_feed[:url]).ordered.and_yield(trotter_curl)
241
+
242
+ results = Feedzirra::Feed.fetch_raw([@paul_feed[:url], @trotter_feed[:url]])
243
+ results.keys.should include(@paul_feed[:url])
244
+ results.keys.should include(@trotter_feed[:url])
245
+ results[@paul_feed[:url]].should =~ /Paul Dix/
246
+ results[@trotter_feed[:url]].should =~ /Trotter Cashion/
247
+ end
248
+
249
+ it "should always return a hash when passed an array" do
250
+ results = Feedzirra::Feed.fetch_raw([@paul_feed[:url]])
251
+ results.class.should == Hash
252
+ end
253
+ end
254
+
255
+ describe "#add_url_to_multi" do
256
+ before(:each) do
257
+ @multi = Curl::Multi.get(@paul_feed[:url])
258
+ @multi.stub!(:add)
259
+ @easy_curl = Curl::Easy.new(@paul_feed[:url])
260
+
261
+ Curl::Easy.should_receive(:new).and_yield(@easy_curl)
262
+ end
263
+
264
+ it "should set user agent if it's passed as an option" do
265
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :user_agent => 'My cool application')
266
+ @easy_curl.headers["User-Agent"].should == 'My cool application'
267
+ end
268
+
269
+ it "should set user agent to default if it's not passed as an option" do
270
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
271
+ @easy_curl.headers["User-Agent"].should == Feedzirra::Feed::USER_AGENT
272
+ end
273
+
274
+ it "should set if modified since as an option if passed" do
275
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :if_modified_since => Time.parse("Jan 25 2009 04:10:32 GMT"))
276
+ @easy_curl.headers["If-Modified-Since"].should == 'Sun, 25 Jan 2009 04:10:32 GMT'
277
+ end
278
+
279
+ it 'should set follow location to true' do
280
+ @easy_curl.should_receive(:follow_location=).with(true)
281
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
282
+ end
283
+
284
+ it 'should set userpwd for http basic authentication if :http_authentication is passed' do
285
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :http_authentication => ['myusername', 'mypassword'])
286
+ @easy_curl.userpwd.should == 'myusername:mypassword'
287
+ end
288
+
289
+ it 'should set accepted encodings' do
290
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {:compress => true})
291
+ @easy_curl.headers["Accept-encoding"].should == 'gzip, deflate'
292
+ end
293
+
294
+ it "should set if_none_match as an option if passed" do
295
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :if_none_match => 'ziEyTl4q9GH04BR4jgkImd0GvSE')
296
+ @easy_curl.headers["If-None-Match"].should == 'ziEyTl4q9GH04BR4jgkImd0GvSE'
297
+ end
298
+
299
+ describe 'on success' do
300
+ before(:each) do
301
+ @feed = mock('feed', :feed_url= => true, :etag= => true, :last_modified= => true)
302
+ Feedzirra::Feed.stub!(:decode_content).and_return(@paul_feed[:xml])
303
+ Feedzirra::Feed.stub!(:determine_feed_parser_for_xml).and_return(Feedzirra::Parser::AtomFeedBurner)
304
+ Feedzirra::Parser::AtomFeedBurner.stub!(:parse).and_return(@feed)
305
+ Feedzirra::Feed.stub!(:etag_from_header).and_return('ziEyTl4q9GH04BR4jgkImd0GvSE')
306
+ Feedzirra::Feed.stub!(:last_modified_from_header).and_return('Wed, 28 Jan 2009 04:10:32 GMT')
307
+ end
308
+
309
+ it 'should decode the response body' do
310
+ Feedzirra::Feed.should_receive(:decode_content).with(@easy_curl).and_return(@paul_feed[:xml])
311
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
312
+ @easy_curl.on_success.call(@easy_curl)
313
+ end
314
+
315
+ it 'should determine the xml parser class' do
316
+ Feedzirra::Feed.should_receive(:determine_feed_parser_for_xml).with(@paul_feed[:xml]).and_return(Feedzirra::Parser::AtomFeedBurner)
317
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
318
+ @easy_curl.on_success.call(@easy_curl)
319
+ end
320
+
321
+ it 'should parse the xml' do
322
+ Feedzirra::Parser::AtomFeedBurner.should_receive(:parse).with(@paul_feed[:xml], an_instance_of(Proc)).and_return(@feed)
323
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
324
+ @easy_curl.on_success.call(@easy_curl)
325
+ end
326
+
327
+ describe 'when a compatible xml parser class is found' do
328
+ it 'should set the last effective url to the feed url' do
329
+ @easy_curl.should_receive(:last_effective_url).and_return(@paul_feed[:url])
330
+ @feed.should_receive(:feed_url=).with(@paul_feed[:url])
331
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
332
+ @easy_curl.on_success.call(@easy_curl)
333
+ end
334
+
335
+ it 'should set the etags on the feed' do
336
+ @feed.should_receive(:etag=).with('ziEyTl4q9GH04BR4jgkImd0GvSE')
337
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
338
+ @easy_curl.on_success.call(@easy_curl)
339
+ end
340
+
341
+ it 'should set the last modified on the feed' do
342
+ @feed.should_receive(:last_modified=).with('Wed, 28 Jan 2009 04:10:32 GMT')
343
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
344
+ @easy_curl.on_success.call(@easy_curl)
345
+ end
346
+
347
+ it 'should add the feed to the responses' do
348
+ responses = {}
349
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], responses, {})
350
+ @easy_curl.on_success.call(@easy_curl)
351
+
352
+ responses.length.should == 1
353
+ responses['http://feeds.feedburner.com/PaulDixExplainsNothing'].should == @feed
354
+ end
355
+
356
+ it 'should call proc if :on_success option is passed' do
357
+ success = lambda { |url, feed| }
358
+ success.should_receive(:call).with(@paul_feed[:url], @feed)
359
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { :on_success => success })
360
+ @easy_curl.on_success.call(@easy_curl)
361
+ end
362
+ end
363
+
364
+ describe 'when no compatible xml parser class is found' do
365
+ it 'should raise a NoParserAvailable exception'
366
+ end
367
+ end
368
+
369
+ describe 'on failure' do
370
+ before(:each) do
371
+ @headers = "HTTP/1.0 404 Not Found\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\n"
372
+ @body = 'Page could not be found.'
373
+
374
+ @easy_curl.stub!(:response_code).and_return(404)
375
+ @easy_curl.stub!(:header_str).and_return(@headers)
376
+ @easy_curl.stub!(:body_str).and_return(@body)
377
+ end
378
+
379
+ it 'should call proc if :on_failure option is passed' do
380
+ failure = lambda { |url, feed| }
381
+ failure.should_receive(:call).with(@paul_feed[:url], 404, @headers, @body)
382
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { :on_failure => failure })
383
+ @easy_curl.on_failure.call(@easy_curl)
384
+ end
385
+
386
+ it 'should return the http code in the responses' do
387
+ responses = {}
388
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], responses, {})
389
+ @easy_curl.on_failure.call(@easy_curl)
390
+
391
+ responses.length.should == 1
392
+ responses[@paul_feed[:url]].should == 404
393
+ end
394
+ end
395
+ end
396
+
397
+ describe "#add_feed_to_multi" do
398
+ before(:each) do
399
+ @multi = Curl::Multi.get(@paul_feed[:url])
400
+ @multi.stub!(:add)
401
+ @easy_curl = Curl::Easy.new(@paul_feed[:url])
402
+ @feed = Feedzirra::Feed.parse(sample_feedburner_atom_feed)
403
+
404
+ Curl::Easy.should_receive(:new).and_yield(@easy_curl)
405
+ end
406
+
407
+ it "should set user agent if it's passed as an option" do
408
+ Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, :user_agent => 'My cool application')
409
+ @easy_curl.headers["User-Agent"].should == 'My cool application'
410
+ end
411
+
412
+ it "should set user agent to default if it's not passed as an option" do
413
+ Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
414
+ @easy_curl.headers["User-Agent"].should == Feedzirra::Feed::USER_AGENT
415
+ end
416
+
417
+ it "should set if modified since as an option if passed" do
418
+ modified_time = Time.parse("Wed, 28 Jan 2009 04:10:32 GMT")
419
+ Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {:if_modified_since => modified_time})
420
+ modified_time.should be > @feed.last_modified
421
+
422
+ @easy_curl.headers["If-Modified-Since"].should == modified_time
423
+ end
424
+
425
+ it 'should set follow location to true' do
426
+ @easy_curl.should_receive(:follow_location=).with(true)
427
+ Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
428
+ end
429
+
430
+ it 'should set userpwd for http basic authentication if :http_authentication is passed' do
431
+ Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, :http_authentication => ['myusername', 'mypassword'])
432
+ @easy_curl.userpwd.should == 'myusername:mypassword'
433
+ end
434
+
435
+ it "should set if_none_match as an option if passed" do
436
+ @feed.etag = 'ziEyTl4q9GH04BR4jgkImd0GvSE'
437
+ Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
438
+ @easy_curl.headers["If-None-Match"].should == 'ziEyTl4q9GH04BR4jgkImd0GvSE'
439
+ end
440
+
441
+ describe 'on success' do
442
+ before(:each) do
443
+ @new_feed = @feed.clone
444
+ @feed.stub!(:update_from_feed)
445
+ Feedzirra::Feed.stub!(:decode_content).and_return(@paul_feed[:xml])
446
+ Feedzirra::Feed.stub!(:determine_feed_parser_for_xml).and_return(Feedzirra::Parser::AtomFeedBurner)
447
+ Feedzirra::Parser::AtomFeedBurner.stub!(:parse).and_return(@new_feed)
448
+ Feedzirra::Feed.stub!(:etag_from_header).and_return('ziEyTl4q9GH04BR4jgkImd0GvSE')
449
+ Feedzirra::Feed.stub!(:last_modified_from_header).and_return('Wed, 28 Jan 2009 04:10:32 GMT')
450
+ end
451
+
452
+ it 'should process the next feed in the queue'
453
+
454
+ it 'should parse the updated feed' do
455
+ Feedzirra::Parser::AtomFeedBurner.should_receive(:parse).and_return(@new_feed)
456
+ Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
457
+ @easy_curl.on_success.call(@easy_curl)
458
+ end
459
+
460
+ it 'should set the last effective url to the feed url' do
461
+ @easy_curl.should_receive(:last_effective_url).and_return(@paul_feed[:url])
462
+ @new_feed.should_receive(:feed_url=).with(@paul_feed[:url])
463
+ Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
464
+ @easy_curl.on_success.call(@easy_curl)
465
+ end
466
+
467
+ it 'should set the etags on the feed' do
468
+ @new_feed.should_receive(:etag=).with('ziEyTl4q9GH04BR4jgkImd0GvSE')
469
+ Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
470
+ @easy_curl.on_success.call(@easy_curl)
471
+ end
472
+
473
+ it 'should set the last modified on the feed' do
474
+ @new_feed.should_receive(:last_modified=).with('Wed, 28 Jan 2009 04:10:32 GMT')
475
+ Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
476
+ @easy_curl.on_success.call(@easy_curl)
477
+ end
478
+
479
+ it 'should add the feed to the responses' do
480
+ responses = {}
481
+ Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], responses, {})
482
+ @easy_curl.on_success.call(@easy_curl)
483
+
484
+ responses.length.should == 1
485
+ responses['http://feeds.feedburner.com/PaulDixExplainsNothing'].should == @feed
486
+ end
487
+
488
+ it 'should call proc if :on_success option is passed' do
489
+ success = lambda { |feed| }
490
+ success.should_receive(:call).with(@feed)
491
+ Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, { :on_success => success })
492
+ @easy_curl.on_success.call(@easy_curl)
493
+ end
494
+
495
+ it 'should call update from feed on the old feed with the updated feed' do
496
+ @feed.should_receive(:update_from_feed).with(@new_feed)
497
+ Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
498
+ @easy_curl.on_success.call(@easy_curl)
499
+ end
500
+ end
501
+
502
+ describe 'on failure' do
503
+ before(:each) do
504
+ @headers = "HTTP/1.0 404 Not Found\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\n"
505
+ @body = 'Page could not be found.'
506
+
507
+ @easy_curl.stub!(:response_code).and_return(404)
508
+ @easy_curl.stub!(:header_str).and_return(@headers)
509
+ @easy_curl.stub!(:body_str).and_return(@body)
510
+ end
511
+
512
+ it 'should call on success callback if the response code is 304' do
513
+ success = lambda { |feed| }
514
+ success.should_receive(:call).with(@feed)
515
+ @easy_curl.should_receive(:response_code).and_return(304)
516
+ Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, { :on_success => success })
517
+ @easy_curl.on_failure.call(@easy_curl)
518
+ end
519
+
520
+ it 'should return the http code in the responses' do
521
+ responses = {}
522
+ Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], responses, {})
523
+ @easy_curl.on_failure.call(@easy_curl)
524
+
525
+ responses.length.should == 1
526
+ responses['http://www.pauldix.net/'].should == 404
527
+ end
528
+ end
529
+ end
530
+
531
+ describe "#fetch_and_parse" do
532
+ it 'should initiate the fetching and parsing using multicurl'
533
+ it "should pass any request options through to add_url_to_multi"
534
+ it 'should slice the feeds into groups of thirty for processing'
535
+ it "should return a feed object if a single feed is passed in"
536
+ it "should return an return an array of feed objects if multiple feeds are passed in"
537
+
538
+ it "should set if modified since as an option if passed" do
539
+ modified_time = Time.parse("Wed, 28 Jan 2009 04:10:32 GMT")
540
+ Feedzirra::Feed.should_receive(:add_url_to_multi).with(anything, anything, anything, anything, {:if_modified_since => modified_time}).any_number_of_times
541
+
542
+ @feed = Feedzirra::Feed.fetch_and_parse(sample_feedburner_atom_feed, {:if_modified_since => modified_time})
543
+ end
544
+
545
+ end
546
+
547
+ describe "#decode_content" do
548
+ before(:each) do
549
+ @curl_easy = mock('curl_easy', :body_str => '<xml></xml>')
550
+ end
551
+
552
+ it 'should decode the response body using gzip if the Content-Encoding: is gzip' do
553
+ @curl_easy.stub!(:header_str).and_return('Content-Encoding: gzip')
554
+ string_io = mock('stringio', :read => @curl_easy.body_str, :close => true)
555
+ StringIO.should_receive(:new).and_return(string_io)
556
+ Zlib::GzipReader.should_receive(:new).with(string_io).and_return(string_io)
557
+ Feedzirra::Feed.decode_content(@curl_easy)
558
+ end
559
+
560
+ it 'should decode the response body using gzip if the Content-Encoding: is gzip even when the case is wrong' do
561
+ @curl_easy.stub!(:header_str).and_return('content-encoding: gzip')
562
+ string_io = mock('stringio', :read => @curl_easy.body_str, :close => true)
563
+ StringIO.should_receive(:new).and_return(string_io)
564
+ Zlib::GzipReader.should_receive(:new).with(string_io).and_return(string_io)
565
+ Feedzirra::Feed.decode_content(@curl_easy)
566
+ end
567
+
568
+ it 'should deflate the response body using inflate if the Content-Encoding: is deflate' do
569
+ @curl_easy.stub!(:header_str).and_return('Content-Encoding: deflate')
570
+ Zlib::Inflate.should_receive(:inflate).with(@curl_easy.body_str)
571
+ Feedzirra::Feed.decode_content(@curl_easy)
572
+ end
573
+
574
+ it 'should deflate the response body using inflate if the Content-Encoding: is deflate event if the case is wrong' do
575
+ @curl_easy.stub!(:header_str).and_return('content-encoding: deflate')
576
+ Zlib::Inflate.should_receive(:inflate).with(@curl_easy.body_str)
577
+ Feedzirra::Feed.decode_content(@curl_easy)
578
+ end
579
+
580
+ it 'should return the response body if it is not encoded' do
581
+ @curl_easy.stub!(:header_str).and_return('')
582
+ Feedzirra::Feed.decode_content(@curl_easy).should == '<xml></xml>'
583
+ end
584
+ end
585
+
586
+ describe "#update" do
587
+ it 'should perform the updating using multicurl'
588
+ it "should pass any request options through to add_feed_to_multi"
589
+ it "should return a feed object if a single feed is passed in"
590
+ it "should return an return an array of feed objects if multiple feeds are passed in"
591
+ end
592
+ end
593
+ end