codders-feedzirra 0.2.0.rc2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +12 -0
- data/.rspec +1 -0
- data/.travis.yml +9 -0
- data/Gemfile +10 -0
- data/Guardfile +6 -0
- data/HISTORY.md +25 -0
- data/README.md +179 -0
- data/Rakefile +6 -0
- data/feedzirra.gemspec +28 -0
- data/lib/feedzirra.rb +17 -0
- data/lib/feedzirra/core_ext.rb +3 -0
- data/lib/feedzirra/core_ext/date.rb +19 -0
- data/lib/feedzirra/core_ext/string.rb +9 -0
- data/lib/feedzirra/core_ext/time.rb +29 -0
- data/lib/feedzirra/feed.rb +382 -0
- data/lib/feedzirra/feed_entry_utilities.rb +65 -0
- data/lib/feedzirra/feed_utilities.rb +72 -0
- data/lib/feedzirra/parser.rb +20 -0
- data/lib/feedzirra/parser/atom.rb +29 -0
- data/lib/feedzirra/parser/atom_entry.rb +30 -0
- data/lib/feedzirra/parser/atom_feed_burner.rb +21 -0
- data/lib/feedzirra/parser/atom_feed_burner_entry.rb +31 -0
- data/lib/feedzirra/parser/google_docs_atom.rb +28 -0
- data/lib/feedzirra/parser/google_docs_atom_entry.rb +29 -0
- data/lib/feedzirra/parser/itunes_rss.rb +50 -0
- data/lib/feedzirra/parser/itunes_rss_item.rb +32 -0
- data/lib/feedzirra/parser/itunes_rss_owner.rb +12 -0
- data/lib/feedzirra/parser/rss.rb +22 -0
- data/lib/feedzirra/parser/rss_entry.rb +34 -0
- data/lib/feedzirra/parser/rss_feed_burner.rb +22 -0
- data/lib/feedzirra/parser/rss_feed_burner_entry.rb +40 -0
- data/lib/feedzirra/version.rb +3 -0
- data/spec/benchmarks/feed_benchmarks.rb +98 -0
- data/spec/benchmarks/feedzirra_benchmarks.rb +40 -0
- data/spec/benchmarks/fetching_benchmarks.rb +28 -0
- data/spec/benchmarks/parsing_benchmark.rb +30 -0
- data/spec/benchmarks/updating_benchmarks.rb +33 -0
- data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
- data/spec/feedzirra/feed_spec.rb +599 -0
- data/spec/feedzirra/feed_utilities_spec.rb +150 -0
- data/spec/feedzirra/parser/atom_entry_spec.rb +86 -0
- data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +47 -0
- data/spec/feedzirra/parser/atom_feed_burner_spec.rb +47 -0
- data/spec/feedzirra/parser/atom_spec.rb +51 -0
- data/spec/feedzirra/parser/google_docs_atom_entry_spec.rb +22 -0
- data/spec/feedzirra/parser/google_docs_atom_spec.rb +31 -0
- data/spec/feedzirra/parser/itunes_rss_item_spec.rb +48 -0
- data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +18 -0
- data/spec/feedzirra/parser/itunes_rss_spec.rb +54 -0
- data/spec/feedzirra/parser/rss_entry_spec.rb +85 -0
- data/spec/feedzirra/parser/rss_feed_burner_entry_spec.rb +85 -0
- data/spec/feedzirra/parser/rss_feed_burner_spec.rb +52 -0
- data/spec/feedzirra/parser/rss_spec.rb +49 -0
- data/spec/sample_feeds/AmazonWebServicesBlog.xml +796 -0
- data/spec/sample_feeds/AmazonWebServicesBlogFirstEntryContent.xml +63 -0
- data/spec/sample_feeds/FeedBurnerUrlNoAlternate.xml +27 -0
- data/spec/sample_feeds/GoogleDocsList.xml +187 -0
- data/spec/sample_feeds/HREFConsideredHarmful.xml +313 -0
- data/spec/sample_feeds/HREFConsideredHarmfulFirstEntry.xml +22 -0
- data/spec/sample_feeds/PaulDixExplainsNothing.xml +174 -0
- data/spec/sample_feeds/PaulDixExplainsNothingAlternate.xml +174 -0
- data/spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml +19 -0
- data/spec/sample_feeds/PaulDixExplainsNothingWFW.xml +174 -0
- data/spec/sample_feeds/TechCrunch.xml +1514 -0
- data/spec/sample_feeds/TechCrunchFirstEntry.xml +9 -0
- data/spec/sample_feeds/TechCrunchFirstEntryDescription.xml +3 -0
- data/spec/sample_feeds/TenderLovemaking.xml +515 -0
- data/spec/sample_feeds/TenderLovemakingFirstEntry.xml +66 -0
- data/spec/sample_feeds/TrotterCashionHome.xml +610 -0
- data/spec/sample_feeds/atom_with_link_tag_for_url_unmarked.xml +30 -0
- data/spec/sample_feeds/itunes.xml +60 -0
- data/spec/sample_feeds/run_against_sample.rb +20 -0
- data/spec/sample_feeds/top5kfeeds.dat +2170 -0
- data/spec/sample_feeds/trouble_feeds.txt +16 -0
- data/spec/spec_helper.rb +75 -0
- metadata +203 -0
@@ -0,0 +1,34 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
|
3
|
+
module Parser
|
4
|
+
# Parser for dealing with RDF feed entries.
|
5
|
+
class RSSEntry
|
6
|
+
include SAXMachine
|
7
|
+
include FeedEntryUtilities
|
8
|
+
|
9
|
+
element :title
|
10
|
+
element :link, :as => :url
|
11
|
+
|
12
|
+
element :"dc:creator", :as => :author
|
13
|
+
element :author, :as => :author
|
14
|
+
element :"content:encoded", :as => :content
|
15
|
+
element :description, :as => :summary
|
16
|
+
|
17
|
+
element :pubDate, :as => :published
|
18
|
+
element :pubdate, :as => :published
|
19
|
+
element :"dc:date", :as => :published
|
20
|
+
element :"dc:Date", :as => :published
|
21
|
+
element :"dcterms:created", :as => :published
|
22
|
+
|
23
|
+
|
24
|
+
element :"dcterms:modified", :as => :updated
|
25
|
+
element :issued, :as => :published
|
26
|
+
elements :category, :as => :categories
|
27
|
+
|
28
|
+
element :guid, :as => :entry_id
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
|
3
|
+
module Parser
|
4
|
+
# Parser for dealing with RSS feeds.
|
5
|
+
class RSSFeedBurner
|
6
|
+
include SAXMachine
|
7
|
+
include FeedUtilities
|
8
|
+
element :title
|
9
|
+
element :description
|
10
|
+
element :link, :as => :url
|
11
|
+
elements :item, :as => :entries, :class => RSSFeedBurnerEntry
|
12
|
+
|
13
|
+
attr_accessor :feed_url
|
14
|
+
|
15
|
+
def self.able_to_parse?(xml) #:nodoc:
|
16
|
+
(/\<rss|\<rdf/ =~ xml) && (/feedburner/ =~ xml)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
|
3
|
+
module Parser
|
4
|
+
# Parser for dealing with RDF feed entries.
|
5
|
+
class RSSFeedBurnerEntry
|
6
|
+
include SAXMachine
|
7
|
+
include FeedEntryUtilities
|
8
|
+
|
9
|
+
element :title
|
10
|
+
|
11
|
+
element :"feedburner:origLink", :as => :url
|
12
|
+
element :link, :as => :url
|
13
|
+
|
14
|
+
element :"dc:creator", :as => :author
|
15
|
+
element :author, :as => :author
|
16
|
+
element :"content:encoded", :as => :content
|
17
|
+
element :description, :as => :summary
|
18
|
+
|
19
|
+
element :pubDate, :as => :published
|
20
|
+
element :pubdate, :as => :published
|
21
|
+
element :"dc:date", :as => :published
|
22
|
+
element :"dc:Date", :as => :published
|
23
|
+
element :"dcterms:created", :as => :published
|
24
|
+
|
25
|
+
|
26
|
+
element :"dcterms:modified", :as => :updated
|
27
|
+
element :issued, :as => :published
|
28
|
+
elements :category, :as => :categories
|
29
|
+
|
30
|
+
element :guid, :as => :entry_id
|
31
|
+
|
32
|
+
def url
|
33
|
+
@url || @link
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# this is some spike code to compare the speed of different methods for performing
|
2
|
+
# multiple feed fetches
|
3
|
+
require 'rubygems'
|
4
|
+
require 'curb'
|
5
|
+
require 'active_support'
|
6
|
+
|
7
|
+
require 'net/http'
|
8
|
+
require 'uri'
|
9
|
+
|
10
|
+
require 'benchmark'
|
11
|
+
include Benchmark
|
12
|
+
|
13
|
+
GET_COUNT = 1
|
14
|
+
urls = ["http://www.pauldix.net"] * GET_COUNT
|
15
|
+
|
16
|
+
|
17
|
+
benchmark do |t|
|
18
|
+
t.report("taf2-curb") do
|
19
|
+
multi = Curl::Multi.new
|
20
|
+
urls.each do |url|
|
21
|
+
easy = Curl::Easy.new(url) do |curl|
|
22
|
+
curl.headers["User-Agent"] = "feedzirra"
|
23
|
+
# curl.headers["If-Modified-Since"] = Time.now.httpdate
|
24
|
+
# curl.headers["If-None-Match"] = "ziEyTl4q9GH04BR4jgkImd0GvSE"
|
25
|
+
curl.follow_location = true
|
26
|
+
curl.on_success do |c|
|
27
|
+
# puts c.header_str.inspect
|
28
|
+
# puts c.response_code
|
29
|
+
# puts c.body_str.slice(0, 500)
|
30
|
+
end
|
31
|
+
curl.on_failure do |c|
|
32
|
+
puts "**** #{c.response_code}"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
multi.add(easy)
|
36
|
+
end
|
37
|
+
|
38
|
+
multi.perform
|
39
|
+
end
|
40
|
+
|
41
|
+
t.report("nethttp") do
|
42
|
+
urls.each do |url|
|
43
|
+
res = Net::HTTP.get(URI.parse(url))
|
44
|
+
# puts res.slice(0, 500)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
require 'rfuzz/session'
|
49
|
+
include RFuzz
|
50
|
+
t.report("rfuzz") do
|
51
|
+
GET_COUNT.times do
|
52
|
+
http = HttpClient.new("www.pauldix.net", 80)
|
53
|
+
response = http.get("/")
|
54
|
+
if response.http_status != "200"
|
55
|
+
puts "***** #{response.http_status}"
|
56
|
+
else
|
57
|
+
# puts response.http_status
|
58
|
+
# puts response.http_body.slice(0, 500)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
require 'eventmachine'
|
64
|
+
t.report("eventmachine") do
|
65
|
+
counter = GET_COUNT
|
66
|
+
EM.run do
|
67
|
+
GET_COUNT.times do
|
68
|
+
http = EM::Protocols::HttpClient2.connect("www.pauldix.net", 80)
|
69
|
+
request = http.get("/")
|
70
|
+
request.callback do
|
71
|
+
# puts request.status
|
72
|
+
# puts request.content.slice(0, 500)
|
73
|
+
counter -= 1
|
74
|
+
EM.stop if counter == 0
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
require 'curl-multi'
|
82
|
+
t.report("curl multi") do
|
83
|
+
multi = Curl::Multi.new
|
84
|
+
urls.each do |url|
|
85
|
+
on_failure = lambda do |ex|
|
86
|
+
puts "****** Failed to retrieve #{url}"
|
87
|
+
end
|
88
|
+
|
89
|
+
on_success = lambda do |body|
|
90
|
+
# puts "got #{url}"
|
91
|
+
# puts body.slice(0, 500)
|
92
|
+
end
|
93
|
+
multi.get(url, on_success, on_failure)
|
94
|
+
end
|
95
|
+
|
96
|
+
multi.select([], []) while multi.size > 0
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
|
2
|
+
require 'rfeedparser'
|
3
|
+
require 'feed-normalizer'
|
4
|
+
require 'open-uri'
|
5
|
+
|
6
|
+
require 'benchmark'
|
7
|
+
include Benchmark
|
8
|
+
|
9
|
+
iterations = 10
|
10
|
+
urls = File.readlines(File.dirname(__FILE__) + "/../sample_feeds/successful_feed_urls.txt").slice(0, 20)
|
11
|
+
puts "benchmarks on #{urls.size} feeds"
|
12
|
+
puts "************************************"
|
13
|
+
benchmark do |t|
|
14
|
+
t.report("feedzirra") do
|
15
|
+
iterations.times do
|
16
|
+
Feedzirra::Feed.fetch_and_parse(urls, :on_success => lambda { |url, feed| $stdout.print '.'; $stdout.flush })
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
t.report("rfeedparser") do
|
21
|
+
iterations.times do
|
22
|
+
urls.each do |url|
|
23
|
+
feed = FeedParser.parse(url)
|
24
|
+
$stdout.print '.'
|
25
|
+
$stdout.flush
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
t.report("feed-normalizer") do
|
31
|
+
iterations.times do
|
32
|
+
urls.each do |url|
|
33
|
+
# have to use the :force option to make feed-normalizer parse an atom feed
|
34
|
+
feed = FeedNormalizer::FeedNormalizer.parse(open(url), :force_parser => FeedNormalizer::SimpleRssParser)
|
35
|
+
$stdout.print '.'
|
36
|
+
$stdout.flush
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
|
3
|
+
|
4
|
+
require 'open-uri'
|
5
|
+
|
6
|
+
require 'benchmark'
|
7
|
+
include Benchmark
|
8
|
+
|
9
|
+
iterations = 10
|
10
|
+
urls = File.readlines(File.dirname(__FILE__) + "/../sample_feeds/successful_feed_urls.txt").slice(0, 20)
|
11
|
+
puts "benchmarks on #{urls.size} feeds"
|
12
|
+
puts "************************************"
|
13
|
+
benchmark do |t|
|
14
|
+
t.report("feedzirra open uri") do
|
15
|
+
iterations.times do
|
16
|
+
urls.each do |url|
|
17
|
+
Feedzirra::Feed.parse(open(url, "User-Agent" => "feedzirra http://github.com/pauldix/feedzirra/tree/master").read)
|
18
|
+
$stdout.print '.'; $stdout.flush
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
t.report("feedzirra fetch and parse") do
|
24
|
+
iterations.times do
|
25
|
+
Feedzirra::Feed.fetch_and_parse(urls, :on_success => lambda { |url, feed| $stdout.print '.'; $stdout.flush })
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
|
2
|
+
require 'rfeedparser'
|
3
|
+
require 'feed-normalizer'
|
4
|
+
|
5
|
+
require 'benchmark'
|
6
|
+
include Benchmark
|
7
|
+
|
8
|
+
iterations = 50
|
9
|
+
xml = File.read(File.dirname(__FILE__) + '/../sample_feeds/PaulDixExplainsNothing.xml')
|
10
|
+
|
11
|
+
benchmark do |t|
|
12
|
+
t.report("feedzirra") do
|
13
|
+
iterations.times do
|
14
|
+
Feedzirra::Feed.parse(xml)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
t.report("rfeedparser") do
|
19
|
+
iterations.times do
|
20
|
+
FeedParser.parse(xml)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
t.report("feed-normalizer") do
|
25
|
+
iterations.times do
|
26
|
+
# have to use the :force option to make feed-normalizer parse an atom feed
|
27
|
+
FeedNormalizer::FeedNormalizer.parse(xml, :force_parser => FeedNormalizer::SimpleRssParser)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
|
3
|
+
|
4
|
+
require 'benchmark'
|
5
|
+
include Benchmark
|
6
|
+
|
7
|
+
urls = File.readlines(File.dirname(__FILE__) + "/../sample_feeds/successful_feed_urls.txt")
|
8
|
+
puts "benchmarks on #{urls.size} feeds"
|
9
|
+
puts "************************************"
|
10
|
+
benchmark do |t|
|
11
|
+
feeds = {}
|
12
|
+
t.report("feedzirra fetch and parse") do
|
13
|
+
feeds = Feedzirra::Feed.fetch_and_parse(urls,
|
14
|
+
:on_success => lambda { |url, feed| $stdout.print '.'; $stdout.flush },
|
15
|
+
:on_failure => lambda {|url, response_code, header, body| puts "#{response_code} ERROR on #{url}"})
|
16
|
+
end
|
17
|
+
|
18
|
+
# curb caches the dns lookups for 60 seconds. to make things fair we have to wait for the cache to expire
|
19
|
+
puts "sleeping to wait for dns cache to clear"
|
20
|
+
65.times {$stdout.print('.'); sleep(1)}
|
21
|
+
puts "done"
|
22
|
+
|
23
|
+
updated_feeds = []
|
24
|
+
t.report("feedzirra update") do
|
25
|
+
updated_feeds = Feedzirra::Feed.update(feeds.values.reject {|f| f.class == Fixnum},
|
26
|
+
:on_success => lambda {|feed| $stdout.print '.'; $stdout.flush},
|
27
|
+
:on_failure => lambda {|feed, response_code, header, body| puts "#{response_code} ERROR on #{feed.feed_url}"})
|
28
|
+
end
|
29
|
+
|
30
|
+
updated_feeds.each do |feed|
|
31
|
+
puts feed.feed_url if feed.updated?
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Feedzirra::FeedUtilities do
|
4
|
+
before(:each) do
|
5
|
+
@klass = Class.new do
|
6
|
+
include Feedzirra::FeedEntryUtilities
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
describe "handling dates" do
|
11
|
+
it "should parse an ISO 8601 formatted datetime into Time" do
|
12
|
+
time = @klass.new.parse_datetime("2008-02-20T8:05:00-010:00")
|
13
|
+
time.class.should == Time
|
14
|
+
time.should == Time.parse_safely("Wed Feb 20 18:05:00 UTC 2008")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe "sanitizing" do
|
19
|
+
before(:each) do
|
20
|
+
@feed = Feedzirra::Feed.parse(sample_atom_feed)
|
21
|
+
@entry = @feed.entries.first
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should provide a sanitized title" do
|
25
|
+
new_title = "<script>this is not safe</script>" + @entry.title
|
26
|
+
@entry.title = new_title
|
27
|
+
@entry.title.sanitize.should == Loofah.scrub_fragment(new_title, :prune).to_s
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should sanitize content in place" do
|
31
|
+
new_content = "<script>" + @entry.content
|
32
|
+
@entry.content = new_content.dup
|
33
|
+
@entry.content.sanitize!.should == Loofah.scrub_fragment(new_content, :prune).to_s
|
34
|
+
@entry.content.should == Loofah.scrub_fragment(new_content, :prune).to_s
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should sanitize things in place" do
|
38
|
+
@entry.title += "<script>"
|
39
|
+
@entry.author += "<script>"
|
40
|
+
@entry.content += "<script>"
|
41
|
+
|
42
|
+
cleaned_title = Loofah.scrub_fragment(@entry.title, :prune).to_s
|
43
|
+
cleaned_author = Loofah.scrub_fragment(@entry.author, :prune).to_s
|
44
|
+
cleaned_content = Loofah.scrub_fragment(@entry.content, :prune).to_s
|
45
|
+
|
46
|
+
@entry.sanitize!
|
47
|
+
@entry.title.should == cleaned_title
|
48
|
+
@entry.author.should == cleaned_author
|
49
|
+
@entry.content.should == cleaned_content
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,599 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe Feedzirra::Feed do
|
4
|
+
|
5
|
+
describe "#add_common_feed_element" do
|
6
|
+
before(:all) do
|
7
|
+
Feedzirra::Feed.add_common_feed_element("generator")
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should parse the added element out of Atom feeds" do
|
11
|
+
Feedzirra::Feed.parse(sample_wfw_feed).generator.should == "TypePad"
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should parse the added element out of Atom Feedburner feeds" do
|
15
|
+
Feedzirra::Parser::Atom.new.should respond_to(:generator)
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should parse the added element out of RSS feeds" do
|
19
|
+
Feedzirra::Parser::RSS.new.should respond_to(:generator)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "#add_common_feed_entry_element" do
|
24
|
+
before(:all) do
|
25
|
+
Feedzirra::Feed.add_common_feed_entry_element("wfw:commentRss", :as => :comment_rss)
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should parse the added element out of Atom feeds entries" do
|
29
|
+
Feedzirra::Feed.parse(sample_wfw_feed).entries.first.comment_rss.should == "this is the new val"
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should parse the added element out of Atom Feedburner feeds entries" do
|
33
|
+
Feedzirra::Parser::AtomEntry.new.should respond_to(:comment_rss)
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should parse the added element out of RSS feeds entries" do
|
37
|
+
Feedzirra::Parser::RSSEntry.new.should respond_to(:comment_rss)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe "#parse" do # many of these tests are redundant with the specific feed type tests, but I put them here for completeness
|
42
|
+
context "when there's an available parser" do
|
43
|
+
it "should parse an rdf feed" do
|
44
|
+
feed = Feedzirra::Feed.parse(sample_rdf_feed)
|
45
|
+
feed.title.should == "HREF Considered Harmful"
|
46
|
+
feed.entries.first.published.should == Time.parse_safely("Tue Sep 02 19:50:07 UTC 2008")
|
47
|
+
feed.entries.size.should == 10
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should parse an rss feed" do
|
51
|
+
feed = Feedzirra::Feed.parse(sample_rss_feed)
|
52
|
+
feed.title.should == "Tender Lovemaking"
|
53
|
+
feed.entries.first.published.should == Time.parse_safely("Thu Dec 04 17:17:49 UTC 2008")
|
54
|
+
feed.entries.size.should == 10
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should parse an atom feed" do
|
58
|
+
feed = Feedzirra::Feed.parse(sample_atom_feed)
|
59
|
+
feed.title.should == "Amazon Web Services Blog"
|
60
|
+
feed.entries.first.published.should == Time.parse_safely("Fri Jan 16 18:21:00 UTC 2009")
|
61
|
+
feed.entries.size.should == 10
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should parse an feedburner atom feed" do
|
65
|
+
feed = Feedzirra::Feed.parse(sample_feedburner_atom_feed)
|
66
|
+
feed.title.should == "Paul Dix Explains Nothing"
|
67
|
+
feed.entries.first.published.should == Time.parse_safely("Thu Jan 22 15:50:22 UTC 2009")
|
68
|
+
feed.entries.size.should == 5
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should parse an itunes feed as a standard RSS feed" do
|
72
|
+
feed = Feedzirra::Feed.parse(sample_itunes_feed)
|
73
|
+
feed.title.should == "All About Everything"
|
74
|
+
feed.entries.first.published.should == Time.parse_safely("Wed, 15 Jun 2005 19:00:00 GMT")
|
75
|
+
|
76
|
+
# Since the commit 621957879, iTunes feeds will be parsed as standard RSS, so this
|
77
|
+
# entry should now not have a method for itunes_author.
|
78
|
+
feed.entries.first.should_not respond_to(:itunes_author)
|
79
|
+
feed.entries.size.should == 3
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
context "when there's no available parser" do
|
84
|
+
it "raises Feedzirra::NoParserAvailable" do
|
85
|
+
proc {
|
86
|
+
Feedzirra::Feed.parse("I'm an invalid feed")
|
87
|
+
}.should raise_error(Feedzirra::NoParserAvailable)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
it "should parse an feedburner rss feed" do
|
92
|
+
feed = Feedzirra::Feed.parse(sample_rss_feed_burner_feed)
|
93
|
+
feed.title.should == "TechCrunch"
|
94
|
+
feed.entries.first.published.should == Time.parse_safely("Wed Nov 02 17:25:27 UTC 2011")
|
95
|
+
feed.entries.size.should == 20
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
describe "#determine_feed_parser_for_xml" do
|
100
|
+
it 'should return the Feedzirra::Parser::GoogleDocsAtom calss for a Google Docs atom feed' do
|
101
|
+
Feedzirra::Feed.determine_feed_parser_for_xml(sample_google_docs_list_feed).should == Feedzirra::Parser::GoogleDocsAtom
|
102
|
+
end
|
103
|
+
|
104
|
+
it "should return the Feedzirra::Parser::Atom class for an atom feed" do
|
105
|
+
Feedzirra::Feed.determine_feed_parser_for_xml(sample_atom_feed).should == Feedzirra::Parser::Atom
|
106
|
+
end
|
107
|
+
|
108
|
+
it "should return the Feedzirra::Parser::AtomFeedBurner class for an atom feedburner feed" do
|
109
|
+
Feedzirra::Feed.determine_feed_parser_for_xml(sample_feedburner_atom_feed).should == Feedzirra::Parser::AtomFeedBurner
|
110
|
+
end
|
111
|
+
|
112
|
+
it "should return the Feedzirra::Parser::RSS class for an rdf/rss 1.0 feed" do
|
113
|
+
Feedzirra::Feed.determine_feed_parser_for_xml(sample_rdf_feed).should == Feedzirra::Parser::RSS
|
114
|
+
end
|
115
|
+
|
116
|
+
it "should return the Feedzirra::Parser::RSSFeedBurner class for an rss feedburner feed" do
|
117
|
+
Feedzirra::Feed.determine_feed_parser_for_xml(sample_rss_feed_burner_feed).should == Feedzirra::Parser::RSSFeedBurner
|
118
|
+
end
|
119
|
+
|
120
|
+
it "should return the Feedzirra::Parser::RSS object for an rss 2.0 feed" do
|
121
|
+
Feedzirra::Feed.determine_feed_parser_for_xml(sample_rss_feed).should == Feedzirra::Parser::RSS
|
122
|
+
end
|
123
|
+
|
124
|
+
it "should return a Feedzirra::Parser::RSS object for an itunes feed" do
|
125
|
+
Feedzirra::Feed.determine_feed_parser_for_xml(sample_itunes_feed).should == Feedzirra::Parser::RSS
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
129
|
+
|
130
|
+
describe "when adding feed types" do
|
131
|
+
it "should prioritize added types over the built in ones" do
|
132
|
+
feed_text = "Atom asdf"
|
133
|
+
Feedzirra::Parser::Atom.stub!(:able_to_parse?).and_return(true)
|
134
|
+
new_feed_type = Class.new do
|
135
|
+
def self.able_to_parse?(val)
|
136
|
+
true
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
new_feed_type.should be_able_to_parse(feed_text)
|
141
|
+
Feedzirra::Feed.add_feed_class(new_feed_type)
|
142
|
+
Feedzirra::Feed.determine_feed_parser_for_xml(feed_text).should == new_feed_type
|
143
|
+
|
144
|
+
# this is a hack so that this doesn't break the rest of the tests
|
145
|
+
Feedzirra::Feed.feed_classes.reject! {|o| o == new_feed_type }
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
describe '#etag_from_header' do
|
150
|
+
before(:each) do
|
151
|
+
@header = "HTTP/1.0 200 OK\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\nETag: ziEyTl4q9GH04BR4jgkImd0GvSE\r\nP3P: CP=\"ALL DSP COR NID CUR OUR NOR\"\r\nConnection: close\r\nContent-Type: text/xml;charset=utf-8\r\n\r\n"
|
152
|
+
end
|
153
|
+
|
154
|
+
it "should return the etag from the header if it exists" do
|
155
|
+
Feedzirra::Feed.etag_from_header(@header).should == "ziEyTl4q9GH04BR4jgkImd0GvSE"
|
156
|
+
end
|
157
|
+
|
158
|
+
it "should return nil if there is no etag in the header" do
|
159
|
+
Feedzirra::Feed.etag_from_header("foo").should be_nil
|
160
|
+
end
|
161
|
+
|
162
|
+
end
|
163
|
+
|
164
|
+
describe '#last_modified_from_header' do
|
165
|
+
before(:each) do
|
166
|
+
@header = "HTTP/1.0 200 OK\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\nETag: ziEyTl4q9GH04BR4jgkImd0GvSE\r\nP3P: CP=\"ALL DSP COR NID CUR OUR NOR\"\r\nConnection: close\r\nContent-Type: text/xml;charset=utf-8\r\n\r\n"
|
167
|
+
end
|
168
|
+
|
169
|
+
it "should return the last modified date from the header if it exists" do
|
170
|
+
Feedzirra::Feed.last_modified_from_header(@header).should == Time.parse_safely("Wed, 28 Jan 2009 04:10:32 GMT")
|
171
|
+
end
|
172
|
+
|
173
|
+
it "should return nil if there is no last modified date in the header" do
|
174
|
+
Feedzirra::Feed.last_modified_from_header("foo").should be_nil
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
describe "fetching feeds" do
|
179
|
+
before(:each) do
|
180
|
+
@paul_feed = { :xml => load_sample("PaulDixExplainsNothing.xml"), :url => "http://feeds.feedburner.com/PaulDixExplainsNothing" }
|
181
|
+
@trotter_feed = { :xml => load_sample("TrotterCashionHome.xml"), :url => "http://feeds2.feedburner.com/trottercashion" }
|
182
|
+
end
|
183
|
+
|
184
|
+
describe "#fetch_raw" do
|
185
|
+
before(:each) do
|
186
|
+
@cmock = stub('cmock', :header_str => '', :body_str => @paul_feed[:xml] )
|
187
|
+
@multi = stub('curl_multi', :add => true, :perform => true)
|
188
|
+
@curl_easy = stub('curl_easy')
|
189
|
+
@curl = stub('curl', :headers => {}, :follow_location= => true, :on_failure => true)
|
190
|
+
@curl.stub!(:on_success).and_yield(@cmock)
|
191
|
+
|
192
|
+
Curl::Multi.stub!(:new).and_return(@multi)
|
193
|
+
Curl::Easy.stub!(:new).and_yield(@curl).and_return(@curl_easy)
|
194
|
+
end
|
195
|
+
|
196
|
+
it "should set user agent if it's passed as an option" do
|
197
|
+
Feedzirra::Feed.fetch_raw(@paul_feed[:url], :user_agent => 'Custom Useragent')
|
198
|
+
@curl.headers['User-Agent'].should == 'Custom Useragent'
|
199
|
+
end
|
200
|
+
|
201
|
+
it "should set user agent to default if it's not passed as an option" do
|
202
|
+
Feedzirra::Feed.fetch_raw(@paul_feed[:url])
|
203
|
+
@curl.headers['User-Agent'].should == Feedzirra::Feed::USER_AGENT
|
204
|
+
end
|
205
|
+
|
206
|
+
it "should set if modified since as an option if passed" do
|
207
|
+
Feedzirra::Feed.fetch_raw(@paul_feed[:url], :if_modified_since => Time.parse_safely("Wed, 28 Jan 2009 04:10:32 GMT"))
|
208
|
+
@curl.headers["If-Modified-Since"].should == 'Wed, 28 Jan 2009 04:10:32 GMT'
|
209
|
+
end
|
210
|
+
|
211
|
+
it "should set if none match as an option if passed" do
|
212
|
+
Feedzirra::Feed.fetch_raw(@paul_feed[:url], :if_none_match => 'ziEyTl4q9GH04BR4jgkImd0GvSE')
|
213
|
+
@curl.headers["If-None-Match"].should == 'ziEyTl4q9GH04BR4jgkImd0GvSE'
|
214
|
+
end
|
215
|
+
|
216
|
+
it 'should set userpwd for http basic authentication if :http_authentication is passed' do
|
217
|
+
@curl.should_receive(:userpwd=).with('username:password')
|
218
|
+
Feedzirra::Feed.fetch_raw(@paul_feed[:url], :http_authentication => ['username', 'password'])
|
219
|
+
end
|
220
|
+
|
221
|
+
it 'should set accepted encodings' do
|
222
|
+
Feedzirra::Feed.fetch_raw(@paul_feed[:url], :compress => true)
|
223
|
+
@curl.headers["Accept-encoding"].should == 'gzip, deflate'
|
224
|
+
end
|
225
|
+
|
226
|
+
it "should return raw xml" do
|
227
|
+
Feedzirra::Feed.fetch_raw(@paul_feed[:url]).should =~ /^#{Regexp.escape('<?xml version="1.0" encoding="UTF-8"?>')}/
|
228
|
+
end
|
229
|
+
|
230
|
+
it "should take multiple feed urls and return a hash of urls and response xml" do
|
231
|
+
multi = stub('curl_multi', :add => true, :perform => true)
|
232
|
+
Curl::Multi.stub!(:new).and_return(multi)
|
233
|
+
|
234
|
+
paul_response = stub('paul_response', :header_str => '', :body_str => @paul_feed[:xml] )
|
235
|
+
trotter_response = stub('trotter_response', :header_str => '', :body_str => @trotter_feed[:xml] )
|
236
|
+
|
237
|
+
paul_curl = stub('paul_curl', :headers => {}, :follow_location= => true, :on_failure => true)
|
238
|
+
paul_curl.stub!(:on_success).and_yield(paul_response)
|
239
|
+
|
240
|
+
trotter_curl = stub('trotter_curl', :headers => {}, :follow_location= => true, :on_failure => true)
|
241
|
+
trotter_curl.stub!(:on_success).and_yield(trotter_response)
|
242
|
+
|
243
|
+
Curl::Easy.should_receive(:new).with(@paul_feed[:url]).ordered.and_yield(paul_curl)
|
244
|
+
Curl::Easy.should_receive(:new).with(@trotter_feed[:url]).ordered.and_yield(trotter_curl)
|
245
|
+
|
246
|
+
results = Feedzirra::Feed.fetch_raw([@paul_feed[:url], @trotter_feed[:url]])
|
247
|
+
results.keys.should include(@paul_feed[:url])
|
248
|
+
results.keys.should include(@trotter_feed[:url])
|
249
|
+
results[@paul_feed[:url]].should =~ /Paul Dix/
|
250
|
+
results[@trotter_feed[:url]].should =~ /Trotter Cashion/
|
251
|
+
end
|
252
|
+
|
253
|
+
it "should always return a hash when passed an array" do
|
254
|
+
results = Feedzirra::Feed.fetch_raw([@paul_feed[:url]])
|
255
|
+
results.class.should == Hash
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
describe "#add_url_to_multi" do
|
260
|
+
before(:each) do
|
261
|
+
allow_message_expectations_on_nil
|
262
|
+
@multi = Curl::Multi.get([@paul_feed[:url]], {:follow_location => true}, {:pipeline => true})
|
263
|
+
@multi.stub!(:add)
|
264
|
+
@easy_curl = Curl::Easy.new(@paul_feed[:url])
|
265
|
+
|
266
|
+
Curl::Easy.should_receive(:new).and_yield(@easy_curl)
|
267
|
+
end
|
268
|
+
|
269
|
+
it "should set user agent if it's passed as an option" do
|
270
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :user_agent => 'My cool application')
|
271
|
+
@easy_curl.headers["User-Agent"].should == 'My cool application'
|
272
|
+
end
|
273
|
+
|
274
|
+
it "should set user agent to default if it's not passed as an option" do
|
275
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
276
|
+
@easy_curl.headers["User-Agent"].should == Feedzirra::Feed::USER_AGENT
|
277
|
+
end
|
278
|
+
|
279
|
+
it "should set if modified since as an option if passed" do
|
280
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :if_modified_since => Time.parse_safely("Jan 25 2009 04:10:32 GMT"))
|
281
|
+
@easy_curl.headers["If-Modified-Since"].should == 'Sun, 25 Jan 2009 04:10:32 GMT'
|
282
|
+
end
|
283
|
+
|
284
|
+
it 'should set follow location to true' do
|
285
|
+
@easy_curl.should_receive(:follow_location=).with(true)
|
286
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
287
|
+
end
|
288
|
+
|
289
|
+
it 'should set userpwd for http basic authentication if :http_authentication is passed' do
|
290
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :http_authentication => ['myusername', 'mypassword'])
|
291
|
+
@easy_curl.userpwd.should == 'myusername:mypassword'
|
292
|
+
end
|
293
|
+
|
294
|
+
it 'should set accepted encodings' do
|
295
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {:compress => true})
|
296
|
+
@easy_curl.headers["Accept-encoding"].should == 'gzip, deflate'
|
297
|
+
end
|
298
|
+
|
299
|
+
it "should set if_none_match as an option if passed" do
|
300
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :if_none_match => 'ziEyTl4q9GH04BR4jgkImd0GvSE')
|
301
|
+
@easy_curl.headers["If-None-Match"].should == 'ziEyTl4q9GH04BR4jgkImd0GvSE'
|
302
|
+
end
|
303
|
+
|
304
|
+
describe 'on success' do
|
305
|
+
before(:each) do
|
306
|
+
@feed = mock('feed', :feed_url= => true, :etag= => true, :last_modified= => true)
|
307
|
+
Feedzirra::Feed.stub!(:decode_content).and_return(@paul_feed[:xml])
|
308
|
+
Feedzirra::Feed.stub!(:determine_feed_parser_for_xml).and_return(Feedzirra::Parser::AtomFeedBurner)
|
309
|
+
Feedzirra::Parser::AtomFeedBurner.stub!(:parse).and_return(@feed)
|
310
|
+
Feedzirra::Feed.stub!(:etag_from_header).and_return('ziEyTl4q9GH04BR4jgkImd0GvSE')
|
311
|
+
Feedzirra::Feed.stub!(:last_modified_from_header).and_return('Wed, 28 Jan 2009 04:10:32 GMT')
|
312
|
+
end
|
313
|
+
|
314
|
+
it 'should decode the response body' do
|
315
|
+
Feedzirra::Feed.should_receive(:decode_content).with(@easy_curl).and_return(@paul_feed[:xml])
|
316
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
317
|
+
@easy_curl.on_success.call(@easy_curl)
|
318
|
+
end
|
319
|
+
|
320
|
+
it 'should determine the xml parser class' do
|
321
|
+
Feedzirra::Feed.should_receive(:determine_feed_parser_for_xml).with(@paul_feed[:xml]).and_return(Feedzirra::Parser::AtomFeedBurner)
|
322
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
323
|
+
@easy_curl.on_success.call(@easy_curl)
|
324
|
+
end
|
325
|
+
|
326
|
+
it 'should parse the xml' do
|
327
|
+
Feedzirra::Parser::AtomFeedBurner.should_receive(:parse).with(@paul_feed[:xml], an_instance_of(Proc)).and_return(@feed)
|
328
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
329
|
+
@easy_curl.on_success.call(@easy_curl)
|
330
|
+
end
|
331
|
+
|
332
|
+
describe 'when a compatible xml parser class is found' do
|
333
|
+
it 'should set the last effective url to the feed url' do
|
334
|
+
@easy_curl.should_receive(:last_effective_url).and_return(@paul_feed[:url])
|
335
|
+
@feed.should_receive(:feed_url=).with(@paul_feed[:url])
|
336
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
337
|
+
@easy_curl.on_success.call(@easy_curl)
|
338
|
+
end
|
339
|
+
|
340
|
+
it 'should set the etags on the feed' do
|
341
|
+
@feed.should_receive(:etag=).with('ziEyTl4q9GH04BR4jgkImd0GvSE')
|
342
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
343
|
+
@easy_curl.on_success.call(@easy_curl)
|
344
|
+
end
|
345
|
+
|
346
|
+
it 'should set the last modified on the feed' do
|
347
|
+
@feed.should_receive(:last_modified=).with('Wed, 28 Jan 2009 04:10:32 GMT')
|
348
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
349
|
+
@easy_curl.on_success.call(@easy_curl)
|
350
|
+
end
|
351
|
+
|
352
|
+
it 'should add the feed to the responses' do
|
353
|
+
responses = {}
|
354
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], responses, {})
|
355
|
+
@easy_curl.on_success.call(@easy_curl)
|
356
|
+
|
357
|
+
responses.length.should == 1
|
358
|
+
responses['http://feeds.feedburner.com/PaulDixExplainsNothing'].should == @feed
|
359
|
+
end
|
360
|
+
|
361
|
+
it 'should call proc if :on_success option is passed' do
|
362
|
+
success = lambda { |url, feed| }
|
363
|
+
success.should_receive(:call).with(@paul_feed[:url], @feed)
|
364
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { :on_success => success })
|
365
|
+
@easy_curl.on_success.call(@easy_curl)
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
describe 'when no compatible xml parser class is found' do
|
370
|
+
it 'should raise a NoParserAvailable exception'
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
describe 'on failure' do
|
375
|
+
before(:each) do
|
376
|
+
@headers = "HTTP/1.0 404 Not Found\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\n"
|
377
|
+
@body = 'Page could not be found.'
|
378
|
+
|
379
|
+
@easy_curl.stub!(:response_code).and_return(404)
|
380
|
+
@easy_curl.stub!(:header_str).and_return(@headers)
|
381
|
+
@easy_curl.stub!(:body_str).and_return(@body)
|
382
|
+
end
|
383
|
+
|
384
|
+
it 'should call proc if :on_failure option is passed' do
|
385
|
+
failure = lambda { |url, feed| }
|
386
|
+
failure.should_receive(:call).with(@paul_feed[:url], 404, @headers, @body)
|
387
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { :on_failure => failure })
|
388
|
+
@easy_curl.on_failure.call(@easy_curl)
|
389
|
+
end
|
390
|
+
|
391
|
+
it 'should return the http code in the responses' do
|
392
|
+
responses = {}
|
393
|
+
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], responses, {})
|
394
|
+
@easy_curl.on_failure.call(@easy_curl)
|
395
|
+
|
396
|
+
responses.length.should == 1
|
397
|
+
responses[@paul_feed[:url]].should == 404
|
398
|
+
end
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
describe "#add_feed_to_multi" do
|
403
|
+
before(:each) do
|
404
|
+
allow_message_expectations_on_nil
|
405
|
+
@multi = Curl::Multi.get([@paul_feed[:url]], {:follow_location => true}, {:pipeline => true})
|
406
|
+
@multi.stub!(:add)
|
407
|
+
@easy_curl = Curl::Easy.new(@paul_feed[:url])
|
408
|
+
@feed = Feedzirra::Feed.parse(sample_feedburner_atom_feed)
|
409
|
+
|
410
|
+
Curl::Easy.should_receive(:new).and_yield(@easy_curl)
|
411
|
+
end
|
412
|
+
|
413
|
+
it "should set user agent if it's passed as an option" do
|
414
|
+
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, :user_agent => 'My cool application')
|
415
|
+
@easy_curl.headers["User-Agent"].should == 'My cool application'
|
416
|
+
end
|
417
|
+
|
418
|
+
it "should set user agent to default if it's not passed as an option" do
|
419
|
+
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
420
|
+
@easy_curl.headers["User-Agent"].should == Feedzirra::Feed::USER_AGENT
|
421
|
+
end
|
422
|
+
|
423
|
+
it "should set if modified since as an option if passed" do
|
424
|
+
modified_time = Time.parse_safely("Wed, 28 Jan 2009 04:10:32 GMT")
|
425
|
+
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {:if_modified_since => modified_time})
|
426
|
+
modified_time.should be > @feed.last_modified
|
427
|
+
|
428
|
+
@easy_curl.headers["If-Modified-Since"].should == modified_time
|
429
|
+
end
|
430
|
+
|
431
|
+
it 'should set follow location to true' do
|
432
|
+
@easy_curl.should_receive(:follow_location=).with(true)
|
433
|
+
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
434
|
+
end
|
435
|
+
|
436
|
+
it 'should set userpwd for http basic authentication if :http_authentication is passed' do
|
437
|
+
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, :http_authentication => ['myusername', 'mypassword'])
|
438
|
+
@easy_curl.userpwd.should == 'myusername:mypassword'
|
439
|
+
end
|
440
|
+
|
441
|
+
it "should set if_none_match as an option if passed" do
|
442
|
+
@feed.etag = 'ziEyTl4q9GH04BR4jgkImd0GvSE'
|
443
|
+
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
444
|
+
@easy_curl.headers["If-None-Match"].should == 'ziEyTl4q9GH04BR4jgkImd0GvSE'
|
445
|
+
end
|
446
|
+
|
447
|
+
describe 'on success' do
|
448
|
+
before(:each) do
|
449
|
+
@new_feed = @feed.clone
|
450
|
+
@feed.stub!(:update_from_feed)
|
451
|
+
Feedzirra::Feed.stub!(:decode_content).and_return(@paul_feed[:xml])
|
452
|
+
Feedzirra::Feed.stub!(:determine_feed_parser_for_xml).and_return(Feedzirra::Parser::AtomFeedBurner)
|
453
|
+
Feedzirra::Parser::AtomFeedBurner.stub!(:parse).and_return(@new_feed)
|
454
|
+
Feedzirra::Feed.stub!(:etag_from_header).and_return('ziEyTl4q9GH04BR4jgkImd0GvSE')
|
455
|
+
Feedzirra::Feed.stub!(:last_modified_from_header).and_return('Wed, 28 Jan 2009 04:10:32 GMT')
|
456
|
+
end
|
457
|
+
|
458
|
+
it 'should process the next feed in the queue'
|
459
|
+
|
460
|
+
it 'should parse the updated feed' do
|
461
|
+
Feedzirra::Parser::AtomFeedBurner.should_receive(:parse).and_return(@new_feed)
|
462
|
+
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
463
|
+
@easy_curl.on_success.call(@easy_curl)
|
464
|
+
end
|
465
|
+
|
466
|
+
it 'should set the last effective url to the feed url' do
|
467
|
+
@easy_curl.should_receive(:last_effective_url).and_return(@paul_feed[:url])
|
468
|
+
@new_feed.should_receive(:feed_url=).with(@paul_feed[:url])
|
469
|
+
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
470
|
+
@easy_curl.on_success.call(@easy_curl)
|
471
|
+
end
|
472
|
+
|
473
|
+
it 'should set the etags on the feed' do
|
474
|
+
@new_feed.should_receive(:etag=).with('ziEyTl4q9GH04BR4jgkImd0GvSE')
|
475
|
+
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
476
|
+
@easy_curl.on_success.call(@easy_curl)
|
477
|
+
end
|
478
|
+
|
479
|
+
it 'should set the last modified on the feed' do
|
480
|
+
@new_feed.should_receive(:last_modified=).with('Wed, 28 Jan 2009 04:10:32 GMT')
|
481
|
+
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
482
|
+
@easy_curl.on_success.call(@easy_curl)
|
483
|
+
end
|
484
|
+
|
485
|
+
it 'should add the feed to the responses' do
|
486
|
+
responses = {}
|
487
|
+
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], responses, {})
|
488
|
+
@easy_curl.on_success.call(@easy_curl)
|
489
|
+
|
490
|
+
responses.length.should == 1
|
491
|
+
responses['http://feeds.feedburner.com/PaulDixExplainsNothing'].should == @feed
|
492
|
+
end
|
493
|
+
|
494
|
+
it 'should call proc if :on_success option is passed' do
|
495
|
+
success = lambda { |feed| }
|
496
|
+
success.should_receive(:call).with(@feed)
|
497
|
+
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, { :on_success => success })
|
498
|
+
@easy_curl.on_success.call(@easy_curl)
|
499
|
+
end
|
500
|
+
|
501
|
+
it 'should call update from feed on the old feed with the updated feed' do
|
502
|
+
@feed.should_receive(:update_from_feed).with(@new_feed)
|
503
|
+
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
504
|
+
@easy_curl.on_success.call(@easy_curl)
|
505
|
+
end
|
506
|
+
end
|
507
|
+
|
508
|
+
describe 'on failure' do
|
509
|
+
before(:each) do
|
510
|
+
@headers = "HTTP/1.0 404 Not Found\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\n"
|
511
|
+
@body = 'Page could not be found.'
|
512
|
+
|
513
|
+
@easy_curl.stub!(:response_code).and_return(404)
|
514
|
+
@easy_curl.stub!(:header_str).and_return(@headers)
|
515
|
+
@easy_curl.stub!(:body_str).and_return(@body)
|
516
|
+
end
|
517
|
+
|
518
|
+
it 'should call on success callback if the response code is 304' do
|
519
|
+
success = lambda { |feed| }
|
520
|
+
success.should_receive(:call).with(@feed)
|
521
|
+
@easy_curl.should_receive(:response_code).and_return(304)
|
522
|
+
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, { :on_success => success })
|
523
|
+
@easy_curl.on_failure.call(@easy_curl)
|
524
|
+
end
|
525
|
+
|
526
|
+
it 'should return the http code in the responses' do
|
527
|
+
responses = {}
|
528
|
+
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], responses, {})
|
529
|
+
@easy_curl.on_failure.call(@easy_curl)
|
530
|
+
|
531
|
+
responses.length.should == 1
|
532
|
+
responses['http://www.pauldix.net/'].should == 404
|
533
|
+
end
|
534
|
+
end
|
535
|
+
end
|
536
|
+
|
537
|
+
describe "#fetch_and_parse" do
|
538
|
+
it 'should initiate the fetching and parsing using multicurl'
|
539
|
+
it "should pass any request options through to add_url_to_multi"
|
540
|
+
it 'should slice the feeds into groups of thirty for processing'
|
541
|
+
it "should return a feed object if a single feed is passed in"
|
542
|
+
it "should return an return an array of feed objects if multiple feeds are passed in"
|
543
|
+
|
544
|
+
it "should set if modified since as an option if passed" do
|
545
|
+
modified_time = Time.parse_safely("Wed, 28 Jan 2009 04:10:32 GMT")
|
546
|
+
Feedzirra::Feed.should_receive(:add_url_to_multi).with(anything, anything, anything, anything, {:if_modified_since => modified_time}).any_number_of_times
|
547
|
+
|
548
|
+
@feed = Feedzirra::Feed.fetch_and_parse(sample_feedburner_atom_feed, {:if_modified_since => modified_time})
|
549
|
+
end
|
550
|
+
|
551
|
+
end
|
552
|
+
|
553
|
+
describe "#decode_content" do
|
554
|
+
before(:each) do
|
555
|
+
@curl_easy = mock('curl_easy', :body_str => '<xml></xml>')
|
556
|
+
end
|
557
|
+
|
558
|
+
it 'should decode the response body using gzip if the Content-Encoding: is gzip' do
|
559
|
+
@curl_easy.stub!(:header_str).and_return('Content-Encoding: gzip')
|
560
|
+
string_io = mock('stringio', :read => @curl_easy.body_str, :close => true)
|
561
|
+
StringIO.should_receive(:new).and_return(string_io)
|
562
|
+
Zlib::GzipReader.should_receive(:new).with(string_io).and_return(string_io)
|
563
|
+
Feedzirra::Feed.decode_content(@curl_easy)
|
564
|
+
end
|
565
|
+
|
566
|
+
it 'should decode the response body using gzip if the Content-Encoding: is gzip even when the case is wrong' do
|
567
|
+
@curl_easy.stub!(:header_str).and_return('content-encoding: gzip')
|
568
|
+
string_io = mock('stringio', :read => @curl_easy.body_str, :close => true)
|
569
|
+
StringIO.should_receive(:new).and_return(string_io)
|
570
|
+
Zlib::GzipReader.should_receive(:new).with(string_io).and_return(string_io)
|
571
|
+
Feedzirra::Feed.decode_content(@curl_easy)
|
572
|
+
end
|
573
|
+
|
574
|
+
it 'should deflate the response body using inflate if the Content-Encoding: is deflate' do
|
575
|
+
@curl_easy.stub!(:header_str).and_return('Content-Encoding: deflate')
|
576
|
+
Zlib::Inflate.should_receive(:inflate).with(@curl_easy.body_str)
|
577
|
+
Feedzirra::Feed.decode_content(@curl_easy)
|
578
|
+
end
|
579
|
+
|
580
|
+
it 'should deflate the response body using inflate if the Content-Encoding: is deflate event if the case is wrong' do
|
581
|
+
@curl_easy.stub!(:header_str).and_return('content-encoding: deflate')
|
582
|
+
Zlib::Inflate.should_receive(:inflate).with(@curl_easy.body_str)
|
583
|
+
Feedzirra::Feed.decode_content(@curl_easy)
|
584
|
+
end
|
585
|
+
|
586
|
+
it 'should return the response body if it is not encoded' do
|
587
|
+
@curl_easy.stub!(:header_str).and_return('')
|
588
|
+
Feedzirra::Feed.decode_content(@curl_easy).should == '<xml></xml>'
|
589
|
+
end
|
590
|
+
end
|
591
|
+
|
592
|
+
describe "#update" do
|
593
|
+
it 'should perform the updating using multicurl'
|
594
|
+
it "should pass any request options through to add_feed_to_multi"
|
595
|
+
it "should return a feed object if a single feed is passed in"
|
596
|
+
it "should return an return an array of feed objects if multiple feeds are passed in"
|
597
|
+
end
|
598
|
+
end
|
599
|
+
end
|