rickerbh-feedzirra 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +176 -0
- data/Rakefile +14 -0
- data/lib/core_ext/date.rb +21 -0
- data/lib/feedzirra/atom.rb +14 -0
- data/lib/feedzirra/atom_entry.rb +14 -0
- data/lib/feedzirra/atom_feed_burner.rb +14 -0
- data/lib/feedzirra/atom_feed_burner_entry.rb +14 -0
- data/lib/feedzirra/feed.rb +173 -0
- data/lib/feedzirra/feed_entry_utilities.rb +43 -0
- data/lib/feedzirra/feed_utilities.rb +71 -0
- data/lib/feedzirra/rdf.rb +15 -0
- data/lib/feedzirra/rdf_entry.rb +12 -0
- data/lib/feedzirra/rss.rb +15 -0
- data/lib/feedzirra/rss_entry.rb +16 -0
- data/lib/feedzirra.rb +34 -0
- data/spec/feedzirra/atom_entry_spec.rb +37 -0
- data/spec/feedzirra/atom_feed_burner_entry_spec.rb +42 -0
- data/spec/feedzirra/atom_feed_burner_spec.rb +39 -0
- data/spec/feedzirra/atom_spec.rb +35 -0
- data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
- data/spec/feedzirra/feed_spec.rb +270 -0
- data/spec/feedzirra/feed_utilities_spec.rb +149 -0
- data/spec/feedzirra/rdf_entry_spec.rb +33 -0
- data/spec/feedzirra/rdf_spec.rb +37 -0
- data/spec/feedzirra/rss_entry_spec.rb +37 -0
- data/spec/feedzirra/rss_spec.rb +41 -0
- data/spec/spec.opts +2 -0
- data/spec/spec_helper.rb +50 -0
- metadata +139 -0
@@ -0,0 +1,43 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
module FeedEntryUtilities
|
3
|
+
module Sanitize
|
4
|
+
def sanitize!
|
5
|
+
self.replace(sanitize)
|
6
|
+
end
|
7
|
+
|
8
|
+
def sanitize
|
9
|
+
Dryopteris.sanitize(self)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_reader :published
|
14
|
+
|
15
|
+
def parse_datetime(string)
|
16
|
+
DateTime.parse(string).feed_utils_to_gm_time
|
17
|
+
end
|
18
|
+
|
19
|
+
def published=(val)
|
20
|
+
@published = parse_datetime(val)
|
21
|
+
end
|
22
|
+
|
23
|
+
def content
|
24
|
+
@content.extend(Sanitize)
|
25
|
+
end
|
26
|
+
|
27
|
+
def title
|
28
|
+
@title.extend(Sanitize)
|
29
|
+
end
|
30
|
+
|
31
|
+
def author
|
32
|
+
@author.extend(Sanitize)
|
33
|
+
end
|
34
|
+
|
35
|
+
def sanitize!
|
36
|
+
self.title.sanitize!
|
37
|
+
self.author.sanitize!
|
38
|
+
self.content.sanitize!
|
39
|
+
end
|
40
|
+
|
41
|
+
alias_method :last_modified, :published
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
module FeedUtilities
|
3
|
+
UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified)
|
4
|
+
|
5
|
+
attr_writer :new_entries, :updated, :last_modified
|
6
|
+
attr_accessor :etag
|
7
|
+
|
8
|
+
def last_modified
|
9
|
+
@last_modified ||= begin
|
10
|
+
entry = entries.reject {|e| e.published.nil? }.sort_by { |entry| entry.published if entry.published }.last
|
11
|
+
entry ? entry.published : nil
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def updated?
|
16
|
+
@updated
|
17
|
+
end
|
18
|
+
|
19
|
+
def new_entries
|
20
|
+
@new_entries ||= []
|
21
|
+
end
|
22
|
+
|
23
|
+
def has_new_entries?
|
24
|
+
new_entries.size > 0
|
25
|
+
end
|
26
|
+
|
27
|
+
def update_from_feed(feed)
|
28
|
+
self.new_entries += find_new_entries_for(feed)
|
29
|
+
self.entries.unshift(*self.new_entries)
|
30
|
+
|
31
|
+
updated! if UPDATABLE_ATTRIBUTES.any? { |name| update_attribute(feed, name) }
|
32
|
+
end
|
33
|
+
|
34
|
+
def update_attribute(feed, name)
|
35
|
+
old_value, new_value = send(name), feed.send(name)
|
36
|
+
|
37
|
+
if old_value != new_value
|
38
|
+
send("#{name}=", new_value)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def sanitize_entries!
|
43
|
+
entries.each {|entry| entry.sanitize!}
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def updated!
|
49
|
+
@updated = true
|
50
|
+
end
|
51
|
+
|
52
|
+
def find_new_entries_for(feed)
|
53
|
+
# this implementation is a hack, which is why it's so ugly.
|
54
|
+
# it's to get around the fact that not all feeds have a published date.
|
55
|
+
# however, they're always ordered with the newest one first.
|
56
|
+
# So we go through the entries just parsed and insert each one as a new entry
|
57
|
+
# until we get to one that has the same url as the the newest for the feed
|
58
|
+
latest_entry = self.entries.first
|
59
|
+
found_new_entries = []
|
60
|
+
feed.entries.each do |entry|
|
61
|
+
break if entry.url == latest_entry.url
|
62
|
+
found_new_entries << entry
|
63
|
+
end
|
64
|
+
found_new_entries
|
65
|
+
end
|
66
|
+
|
67
|
+
def existing_entry?(test_entry)
|
68
|
+
entries.any? { |entry| entry.url == test_entry.url }
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
class RDF
|
3
|
+
include SAXMachine
|
4
|
+
include FeedUtilities
|
5
|
+
element :title
|
6
|
+
element :link, :as => :url
|
7
|
+
elements :item, :as => :entries, :class => RDFEntry
|
8
|
+
|
9
|
+
attr_accessor :feed_url
|
10
|
+
|
11
|
+
def self.able_to_parse?(xml)
|
12
|
+
xml =~ /(rdf\:RDF)|(#{Regexp.escape("http://purl.org/rss/1.0")})|(rss version\=\"0\.9.?\")/ || false
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
class RDFEntry
|
3
|
+
include SAXMachine
|
4
|
+
include FeedEntryUtilities
|
5
|
+
element :title
|
6
|
+
element :link, :as => :url
|
7
|
+
element :"dc:creator", :as => :author
|
8
|
+
element :"content:encoded", :as => :content
|
9
|
+
element :description, :as => :summary
|
10
|
+
element :"dc:date", :as => :published
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
class RSS
|
3
|
+
include SAXMachine
|
4
|
+
include FeedUtilities
|
5
|
+
element :title
|
6
|
+
element :link, :as => :url
|
7
|
+
elements :item, :as => :entries, :class => RSSEntry
|
8
|
+
|
9
|
+
attr_accessor :feed_url
|
10
|
+
|
11
|
+
def self.able_to_parse?(xml)
|
12
|
+
xml =~ /\<rss|rdf/
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
class RSSEntry
|
3
|
+
include SAXMachine
|
4
|
+
include FeedEntryUtilities
|
5
|
+
element :title
|
6
|
+
element :link, :as => :url
|
7
|
+
|
8
|
+
element :"dc:creator", :as => :author
|
9
|
+
element :"content:encoded", :as => :content
|
10
|
+
element :description, :as => :summary
|
11
|
+
|
12
|
+
element :pubDate, :as => :published
|
13
|
+
element :"dc:date", :as => :published
|
14
|
+
elements :category, :as => :categories
|
15
|
+
end
|
16
|
+
end
|
data/lib/feedzirra.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__)) unless $LOAD_PATH.include?(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
gem 'activesupport'
|
4
|
+
|
5
|
+
require 'zlib'
|
6
|
+
require 'curb'
|
7
|
+
require 'sax-machine'
|
8
|
+
require 'dryopteris'
|
9
|
+
require 'active_support/basic_object'
|
10
|
+
require 'active_support/core_ext/object'
|
11
|
+
require 'active_support/core_ext/time'
|
12
|
+
|
13
|
+
require 'core_ext/date'
|
14
|
+
|
15
|
+
require 'feedzirra/feed_utilities'
|
16
|
+
require 'feedzirra/feed_entry_utilities'
|
17
|
+
require 'feedzirra/feed'
|
18
|
+
|
19
|
+
require 'feedzirra/rss_entry'
|
20
|
+
require 'feedzirra/rdf_entry'
|
21
|
+
require 'feedzirra/itunes_rss_owner'
|
22
|
+
require 'feedzirra/itunes_rss_item'
|
23
|
+
require 'feedzirra/atom_entry'
|
24
|
+
require 'feedzirra/atom_feed_burner_entry'
|
25
|
+
|
26
|
+
require 'feedzirra/rss'
|
27
|
+
require 'feedzirra/rdf'
|
28
|
+
require 'feedzirra/itunes_rss'
|
29
|
+
require 'feedzirra/atom'
|
30
|
+
require 'feedzirra/atom_feed_burner'
|
31
|
+
|
32
|
+
module Feedzirra
|
33
|
+
VERSION = "0.0.3"
|
34
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe Feedzirra::AtomEntry do
|
4
|
+
before(:each) do
|
5
|
+
# I don't really like doing it this way because these unit test should only rely on AtomEntry,
|
6
|
+
# but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
|
7
|
+
@entry = Feedzirra::Atom.parse(sample_atom_feed).entries.first
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should parse the title" do
|
11
|
+
@entry.title.should == "AWS Job: Architect & Designer Position in Turkey"
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should parse the url" do
|
15
|
+
@entry.url.should == "http://aws.typepad.com/aws/2009/01/aws-job-architect-designer-position-in-turkey.html"
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should parse the author" do
|
19
|
+
@entry.author.should == "AWS Editor"
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should parse the content" do
|
23
|
+
@entry.content.should == sample_atom_entry_content
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should provide a summary" do
|
27
|
+
@entry.summary.should == "Late last year an entrepreneur from Turkey visited me at Amazon HQ in Seattle. We talked about his plans to use AWS as part of his new social video portal startup. I won't spill any beans before he's ready to..."
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should parse the published date" do
|
31
|
+
@entry.published.to_s.should == "Fri Jan 16 18:21:00 UTC 2009"
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should parse the categories" do
|
35
|
+
@entry.categories.should == ['Turkey', 'Seattle']
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe Feedzirra::AtomFeedBurnerEntry do
|
4
|
+
before(:each) do
|
5
|
+
# I don't really like doing it this way because these unit test should only rely on AtomEntry,
|
6
|
+
# but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
|
7
|
+
@entry = Feedzirra::AtomFeedBurner.parse(sample_feedburner_atom_feed).entries.first
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should parse the title" do
|
11
|
+
@entry.title.should == "Making a Ruby C library even faster"
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should be able to fetch a url via the 'alternate' rel if no origLink exists" do
|
15
|
+
entry = Feedzirra::AtomFeedBurner.parse(File.read("#{File.dirname(__FILE__)}/../sample_feeds/PaulDixExplainsNothingAlternate.xml")).entries.first
|
16
|
+
entry.url.should == 'http://feeds.feedburner.com/~r/PaulDixExplainsNothing/~3/519925023/making-a-ruby-c-library-even-faster.html'
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should parse the url" do
|
20
|
+
@entry.url.should == "http://www.pauldix.net/2009/01/making-a-ruby-c-library-even-faster.html"
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should parse the author" do
|
24
|
+
@entry.author.should == "Paul Dix"
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should parse the content" do
|
28
|
+
@entry.content.should == sample_feedburner_atom_entry_content
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should provide a summary" do
|
32
|
+
@entry.summary.should == "Last week I released the first version of a SAX based XML parsing library called SAX-Machine. It uses Nokogiri, which uses libxml, so it's pretty fast. However, I felt that it could be even faster. The only question was how..."
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should parse the published date" do
|
36
|
+
@entry.published.to_s.should == "Thu Jan 22 15:50:22 UTC 2009"
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should parse the categories" do
|
40
|
+
@entry.categories.should == ['Ruby', 'Another Category']
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe Feedzirra::AtomFeedBurner do
|
4
|
+
describe "#will_parse?" do
|
5
|
+
it "should return true for a feedburner atom feed" do
|
6
|
+
Feedzirra::AtomFeedBurner.should be_able_to_parse(sample_feedburner_atom_feed)
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should return false for an rdf feed" do
|
10
|
+
Feedzirra::AtomFeedBurner.should_not be_able_to_parse(sample_rdf_feed)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should return false for a regular atom feed" do
|
14
|
+
Feedzirra::AtomFeedBurner.should_not be_able_to_parse(sample_atom_feed)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe "parsing" do
|
19
|
+
before(:each) do
|
20
|
+
@feed = Feedzirra::AtomFeedBurner.parse(sample_feedburner_atom_feed)
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should parse the title" do
|
24
|
+
@feed.title.should == "Paul Dix Explains Nothing"
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should parse the url" do
|
28
|
+
@feed.url.should == "http://www.pauldix.net/"
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should parse the feed_url" do
|
32
|
+
@feed.feed_url.should == "http://feeds.feedburner.com/PaulDixExplainsNothing"
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should parse entries" do
|
36
|
+
@feed.entries.size.should == 5
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe Feedzirra::Atom do
|
4
|
+
describe "#will_parse?" do
|
5
|
+
it "should return true for an atom feed" do
|
6
|
+
Feedzirra::Atom.should be_able_to_parse(sample_atom_feed)
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should return false for an rdf feed" do
|
10
|
+
Feedzirra::Atom.should_not be_able_to_parse(sample_rdf_feed)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
describe "parsing" do
|
15
|
+
before(:each) do
|
16
|
+
@feed = Feedzirra::Atom.parse(sample_atom_feed)
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should parse the title" do
|
20
|
+
@feed.title.should == "Amazon Web Services Blog"
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should parse the url" do
|
24
|
+
@feed.url.should == "http://aws.typepad.com/aws/"
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should parse the feed_url" do
|
28
|
+
@feed.feed_url.should == "http://aws.typepad.com/aws/atom.xml"
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should parse entries" do
|
32
|
+
@feed.entries.size.should == 10
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe Feedzirra::FeedUtilities do
|
4
|
+
before(:each) do
|
5
|
+
@klass = Class.new do
|
6
|
+
include Feedzirra::FeedEntryUtilities
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
describe "handling dates" do
|
11
|
+
it "should parse an ISO 8601 formatted datetime into Time" do
|
12
|
+
time = @klass.new.parse_datetime("2008-02-20T8:05:00-010:00")
|
13
|
+
time.class.should == Time
|
14
|
+
time.to_s.should == "Wed Feb 20 18:05:00 UTC 2008"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe "sanitizing" do
|
19
|
+
before(:each) do
|
20
|
+
@feed = Feedzirra::Feed.parse(sample_atom_feed)
|
21
|
+
@entry = @feed.entries.first
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should provide a sanitized title" do
|
25
|
+
new_title = "<script>" + @entry.title
|
26
|
+
@entry.title = new_title
|
27
|
+
@entry.title.sanitize.should == Dryopteris.sanitize(new_title)
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should sanitize content in place" do
|
31
|
+
new_content = "<script>" + @entry.content
|
32
|
+
@entry.content = new_content.dup
|
33
|
+
@entry.content.sanitize!.should == Dryopteris.sanitize(new_content)
|
34
|
+
@entry.content.should == Dryopteris.sanitize(new_content)
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should sanitize things in place" do
|
38
|
+
@entry.title += "<script>"
|
39
|
+
@entry.author += "<script>"
|
40
|
+
@entry.content += "<script>"
|
41
|
+
|
42
|
+
cleaned_title = Dryopteris.sanitize(@entry.title)
|
43
|
+
cleaned_author = Dryopteris.sanitize(@entry.author)
|
44
|
+
cleaned_content = Dryopteris.sanitize(@entry.content)
|
45
|
+
|
46
|
+
@entry.sanitize!
|
47
|
+
@entry.title.should == cleaned_title
|
48
|
+
@entry.author.should == cleaned_author
|
49
|
+
@entry.content.should == cleaned_content
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,270 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe Feedzirra::Feed do
|
4
|
+
describe "#parse" do # many of these tests are redundant with the specific feed type tests, but I put them here for completeness
|
5
|
+
context "when there's an available parser" do
|
6
|
+
it "should parse an rdf feed" do
|
7
|
+
feed = Feedzirra::Feed.parse(sample_rdf_feed)
|
8
|
+
feed.title.should == "HREF Considered Harmful"
|
9
|
+
feed.entries.first.published.to_s.should == "Tue Sep 02 19:50:07 UTC 2008"
|
10
|
+
feed.entries.size.should == 10
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should parse an rss feed" do
|
14
|
+
feed = Feedzirra::Feed.parse(sample_rss_feed)
|
15
|
+
feed.title.should == "Tender Lovemaking"
|
16
|
+
feed.entries.first.published.to_s.should == "Thu Dec 04 17:17:49 UTC 2008"
|
17
|
+
feed.entries.size.should == 10
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should parse an atom feed" do
|
21
|
+
feed = Feedzirra::Feed.parse(sample_atom_feed)
|
22
|
+
feed.title.should == "Amazon Web Services Blog"
|
23
|
+
feed.entries.first.published.to_s.should == "Fri Jan 16 18:21:00 UTC 2009"
|
24
|
+
feed.entries.size.should == 10
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should parse an feedburner atom feed" do
|
28
|
+
feed = Feedzirra::Feed.parse(sample_feedburner_atom_feed)
|
29
|
+
feed.title.should == "Paul Dix Explains Nothing"
|
30
|
+
feed.entries.first.published.to_s.should == "Thu Jan 22 15:50:22 UTC 2009"
|
31
|
+
feed.entries.size.should == 5
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should parse an itunes feed" do
|
35
|
+
feed = Feedzirra::Feed.parse(sample_itunes_feed)
|
36
|
+
feed.title.should == "All About Everything"
|
37
|
+
feed.entries.first.published.to_s.should == "Wed, 15 Jun 2005 19:00:00 GMT"
|
38
|
+
feed.entries.first.itunes_author.should == "John Doe"
|
39
|
+
feed.entries.size.should == 3
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
context "when there's no available parser" do
|
45
|
+
it "raises Feedzirra::NoParserAvailable" do
|
46
|
+
proc {
|
47
|
+
Feedzirra::Feed.parse("I'm an invalid feed")
|
48
|
+
}.should raise_error(Feedzirra::NoParserAvailable)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should parse an feedburner rss feed" do
|
53
|
+
feed = Feedzirra::Feed.parse(sample_rss_feed_burner_feed)
|
54
|
+
feed.title.should == "Sam Harris: Author, Philosopher, Essayist, Atheist"
|
55
|
+
feed.entries.first.published.to_s.should == "Tue Jan 13 17:20:28 UTC 2009"
|
56
|
+
feed.entries.size.should == 10
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe "#determine_feed_parser_for_xml" do
|
61
|
+
it "should return the Feedzirra::Atom class for an atom feed" do
|
62
|
+
Feedzirra::Feed.determine_feed_parser_for_xml(sample_atom_feed).should == Feedzirra::Atom
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should return the Feedzirra::AtomFeedBurner class for an atom feedburner feed" do
|
66
|
+
Feedzirra::Feed.determine_feed_parser_for_xml(sample_feedburner_atom_feed).should == Feedzirra::AtomFeedBurner
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should return the Feedzirra::RSS class for an rdf/rss 1.0 feed" do
|
70
|
+
Feedzirra::Feed.determine_feed_parser_for_xml(sample_rdf_feed).should == Feedzirra::RSS
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should return the Feedzirra::RSS class for an rss feedburner feed" do
|
74
|
+
Feedzirra::Feed.determine_feed_parser_for_xml(sample_rss_feed_burner_feed).should == Feedzirra::RSS
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should return the Feedzirra::RSS object for an rss 2.0 feed" do
|
78
|
+
Feedzirra::Feed.determine_feed_parser_for_xml(sample_rss_feed).should == Feedzirra::RSS
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should return the Feedzirra::ITunesRSS object for an itunes feed" do
|
82
|
+
Feedzirra::Feed.determine_feed_parser_for_xml(sample_itunes_feed).should == Feedzirra::ITunesRSS
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
|
87
|
+
describe "adding feed types" do
|
88
|
+
it "should prioritize added feed types over the built in ones" do
|
89
|
+
feed_text = "Atom asdf"
|
90
|
+
Feedzirra::Atom.should be_able_to_parse(feed_text)
|
91
|
+
new_feed_type = Class.new do
|
92
|
+
def self.able_to_parse?(val)
|
93
|
+
true
|
94
|
+
end
|
95
|
+
end
|
96
|
+
new_feed_type.should be_able_to_parse(feed_text)
|
97
|
+
Feedzirra::Feed.add_feed_class(new_feed_type)
|
98
|
+
Feedzirra::Feed.determine_feed_parser_for_xml(feed_text).should == new_feed_type
|
99
|
+
|
100
|
+
# this is a hack so that this doesn't break the rest of the tests
|
101
|
+
Feedzirra::Feed.feed_classes.reject! {|o| o == new_feed_type }
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
describe "header parsing" do
|
106
|
+
before(:each) do
|
107
|
+
@header = "HTTP/1.0 200 OK\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\nETag: ziEyTl4q9GH04BR4jgkImd0GvSE\r\nP3P: CP=\"ALL DSP COR NID CUR OUR NOR\"\r\nConnection: close\r\nContent-Type: text/xml;charset=utf-8\r\n\r\n"
|
108
|
+
end
|
109
|
+
|
110
|
+
it "should parse out an etag" do
|
111
|
+
Feedzirra::Feed.etag_from_header(@header).should == "ziEyTl4q9GH04BR4jgkImd0GvSE"
|
112
|
+
end
|
113
|
+
|
114
|
+
it "should return nil if there is no etag in header" do
|
115
|
+
Feedzirra::Feed.etag_from_header("foo").should be_nil
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should parse out a last-modified date" do
|
119
|
+
Feedzirra::Feed.last_modified_from_header(@header).should == Time.parse("Wed, 28 Jan 2009 04:10:32 GMT")
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should return nil if there is no last-modified in header" do
|
123
|
+
Feedzirra::Feed.last_modified_from_header("foo").should be_nil
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
describe "fetching feeds" do
|
128
|
+
before(:each) do
|
129
|
+
@paul_feed_url = "http://feeds.feedburner.com/PaulDixExplainsNothing"
|
130
|
+
@trotter_feed_url = "http://feeds2.feedburner.com/trottercashion"
|
131
|
+
end
|
132
|
+
|
133
|
+
describe "handling many feeds" do
|
134
|
+
it "should break a large number into more manageable blocks of 40"
|
135
|
+
it "should add to the queue as feeds finish (instead of waiting for each block of 40 to finsih)"
|
136
|
+
end
|
137
|
+
|
138
|
+
describe "#fetch_raw" do
|
139
|
+
it "should take :user_agent as an option"
|
140
|
+
it "should take :if_modified_since as an option"
|
141
|
+
it "should take :if_none_match as an option"
|
142
|
+
it "should take an optional on_success lambda"
|
143
|
+
it "should take an optional on_failure lambda"
|
144
|
+
|
145
|
+
it "should return raw xml" do
|
146
|
+
Feedzirra::Feed.fetch_raw(@paul_feed_url).should =~ /^#{Regexp.escape('<?xml version="1.0" encoding="UTF-8"?>')}/
|
147
|
+
end
|
148
|
+
|
149
|
+
it "should take multiple feed urls and return a hash of urls and response xml" do
|
150
|
+
results = Feedzirra::Feed.fetch_raw([@paul_feed_url, @trotter_feed_url])
|
151
|
+
results.keys.should include(@paul_feed_url)
|
152
|
+
results.keys.should include(@trotter_feed_url)
|
153
|
+
results[@paul_feed_url].should =~ /Paul Dix/
|
154
|
+
results[@trotter_feed_url].should =~ /Trotter Cashion/
|
155
|
+
end
|
156
|
+
|
157
|
+
it "should always return a hash when passed an array" do
|
158
|
+
results = Feedzirra::Feed.fetch_raw([@paul_feed_url])
|
159
|
+
results.class.should == Hash
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
describe "#fetch_and_parse" do
|
164
|
+
it "should return a feed object for a single url" do
|
165
|
+
feed = Feedzirra::Feed.fetch_and_parse(@paul_feed_url)
|
166
|
+
feed.title.should == "Paul Dix Explains Nothing"
|
167
|
+
end
|
168
|
+
|
169
|
+
it "should set the feed_url to the new url if redirected" do
|
170
|
+
feed = Feedzirra::Feed.fetch_and_parse("http://tinyurl.com/tenderlovemaking")
|
171
|
+
feed.feed_url.should == "http://tenderlovemaking.com/feed/"
|
172
|
+
end
|
173
|
+
|
174
|
+
it "should set the feed_url for an rdf feed" do
|
175
|
+
feed = Feedzirra::Feed.fetch_and_parse("http://www.avibryant.com/rss.xml")
|
176
|
+
feed.feed_url.should == "http://www.avibryant.com/rss.xml"
|
177
|
+
end
|
178
|
+
|
179
|
+
it "should set the feed_url for an rss feed" do
|
180
|
+
feed = Feedzirra::Feed.fetch_and_parse("http://tenderlovemaking.com/feed/")
|
181
|
+
feed.feed_url.should == "http://tenderlovemaking.com/feed/"
|
182
|
+
end
|
183
|
+
|
184
|
+
it "should set the feed_url for an itunes feed" do
|
185
|
+
feed = Feedzirra::Feed.fetch_and_parse("http://www.stanford.edu/group/edcorner/uploads/podcast/EducatorsCorner.xml")
|
186
|
+
feed.feed_url.should == "http://www.stanford.edu/group/edcorner/uploads/podcast/EducatorsCorner.xml"
|
187
|
+
end
|
188
|
+
|
189
|
+
it "should return a hash of feed objects with the passed in feed_url for the key and parsed feed for the value for multiple feeds" do
|
190
|
+
feeds = Feedzirra::Feed.fetch_and_parse([@paul_feed_url, @trotter_feed_url])
|
191
|
+
feeds.size.should == 2
|
192
|
+
feeds[@paul_feed_url].feed_url.should == @paul_feed_url
|
193
|
+
feeds[@trotter_feed_url].feed_url.should == @trotter_feed_url
|
194
|
+
end
|
195
|
+
|
196
|
+
it "should always return a hash when passed an array" do
|
197
|
+
feeds = Feedzirra::Feed.fetch_and_parse([@paul_feed_url])
|
198
|
+
feeds.class.should == Hash
|
199
|
+
end
|
200
|
+
|
201
|
+
it "should yeild the url and feed object to a :on_success lambda" do
|
202
|
+
successful_call_mock = mock("successful_call_mock")
|
203
|
+
successful_call_mock.should_receive(:call)
|
204
|
+
Feedzirra::Feed.fetch_and_parse(@paul_feed_url, :on_success => lambda { |feed_url, feed|
|
205
|
+
feed_url.should == @paul_feed_url
|
206
|
+
feed.class.should == Feedzirra::AtomFeedBurner
|
207
|
+
successful_call_mock.call})
|
208
|
+
end
|
209
|
+
|
210
|
+
it "should yield the url, response_code, response_header, and response_body to a :on_failure lambda" do
|
211
|
+
failure_call_mock = mock("failure_call_mock")
|
212
|
+
failure_call_mock.should_receive(:call)
|
213
|
+
fail_url = "http://localhost"
|
214
|
+
Feedzirra::Feed.fetch_and_parse(fail_url, :on_failure => lambda {|feed_url, response_code, response_header, response_body|
|
215
|
+
feed_url.should == fail_url
|
216
|
+
response_code.should == 0
|
217
|
+
response_header.should == ""
|
218
|
+
response_body.should == ""
|
219
|
+
failure_call_mock.call})
|
220
|
+
end
|
221
|
+
|
222
|
+
it "should return a not modified status for a feed with a :if_modified_since is past its last update" do
|
223
|
+
Feedzirra::Feed.fetch_and_parse(@paul_feed_url, :if_modified_since => Time.now).should == 304
|
224
|
+
end
|
225
|
+
|
226
|
+
it "should set the etag from the header" # do
|
227
|
+
# Feedzirra::Feed.fetch_and_parse(@paul_feed_url).etag.should_not == ""
|
228
|
+
# end
|
229
|
+
|
230
|
+
it "should set the last_modified from the header" # do
|
231
|
+
# Feedzirra::Feed.fetch_and_parse(@paul_feed_url).last_modified.should.class == Time
|
232
|
+
# end
|
233
|
+
end
|
234
|
+
|
235
|
+
describe "#update" do
|
236
|
+
it "should update and return a single feed object" do
|
237
|
+
feed = Feedzirra::Feed.fetch_and_parse(@paul_feed_url)
|
238
|
+
feed.entries.delete_at(0)
|
239
|
+
feed.last_modified = nil
|
240
|
+
feed.etag = nil
|
241
|
+
updated_feed = Feedzirra::Feed.update(feed)
|
242
|
+
updated_feed.new_entries.size.should == 1
|
243
|
+
updated_feed.should have_new_entries
|
244
|
+
end
|
245
|
+
|
246
|
+
it "should update a collection of feed objects" do
|
247
|
+
feeds = Feedzirra::Feed.fetch_and_parse([@paul_feed_url, @trotter_feed_url])
|
248
|
+
paul_entries_size = feeds[@paul_feed_url].entries.size
|
249
|
+
trotter_entries_size = feeds[@trotter_feed_url].entries.size
|
250
|
+
|
251
|
+
feeds.values.each do |feed|
|
252
|
+
feed.last_modified = nil
|
253
|
+
feed.etag = nil
|
254
|
+
feed.entries.delete_at(0)
|
255
|
+
end
|
256
|
+
updated_feeds = Feedzirra::Feed.update(feeds.values)
|
257
|
+
updated_feeds.detect {|f| f.feed_url == @paul_feed_url}.entries.size.should == paul_entries_size
|
258
|
+
updated_feeds.detect {|f| f.feed_url == @trotter_feed_url}.entries.size.should == trotter_entries_size
|
259
|
+
end
|
260
|
+
|
261
|
+
it "should return the feed objects even when not updated" do
|
262
|
+
feeds = Feedzirra::Feed.fetch_and_parse([@paul_feed_url, @trotter_feed_url])
|
263
|
+
updated_feeds = Feedzirra::Feed.update(feeds.values)
|
264
|
+
updated_feeds.size.should == 2
|
265
|
+
updated_feeds.first.should_not be_updated
|
266
|
+
updated_feeds.last.should_not be_updated
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|