pauldix-feedzirra 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/feedzirra/atom_entry.rb +4 -0
- data/lib/feedzirra/atom_feed_burner_entry.rb +5 -0
- data/lib/feedzirra/feed.rb +16 -9
- data/lib/feedzirra/feed_entry_utilities.rb +27 -4
- data/lib/feedzirra/rss_entry.rb +8 -0
- data/lib/feedzirra.rb +1 -1
- data/spec/feedzirra/atom_entry_spec.rb +5 -0
- data/spec/feedzirra/feed_spec.rb +1 -1
- metadata +1 -1
- data/lib/feedzirra/rdf.rb +0 -23
- data/lib/feedzirra/rdf_entry.rb +0 -22
- data/spec/feedzirra/rdf_entry_spec.rb +0 -33
- data/spec/feedzirra/rdf_spec.rb +0 -37
data/lib/feedzirra/atom_entry.rb
CHANGED
@@ -19,7 +19,11 @@ module Feedzirra
|
|
19
19
|
element :content
|
20
20
|
element :summary
|
21
21
|
element :published
|
22
|
+
element :id
|
22
23
|
element :created, :as => :published
|
24
|
+
element :issued, :as => :published
|
25
|
+
element :updated
|
26
|
+
element :modified, :as => :updated
|
23
27
|
elements :category, :as => :categories, :value => :term
|
24
28
|
end
|
25
29
|
end
|
@@ -20,6 +20,11 @@ module Feedzirra
|
|
20
20
|
element :summary
|
21
21
|
element :content
|
22
22
|
element :published
|
23
|
+
element :id
|
24
|
+
element :issued, :as => :published
|
25
|
+
element :created, :as => :published
|
26
|
+
element :updated
|
27
|
+
element :modified, :as => :updated
|
23
28
|
elements :category, :as => :categories, :value => :term
|
24
29
|
end
|
25
30
|
end
|
data/lib/feedzirra/feed.rb
CHANGED
@@ -153,15 +153,20 @@ module Feedzirra
|
|
153
153
|
# [curl_request<Curl::Easy>] The Curl::Easy response object from the request.
|
154
154
|
# === Returns
|
155
155
|
# A decoded string of XML.
|
156
|
-
def self.decode_content(
|
157
|
-
if
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
156
|
+
def self.decode_content(c)
|
157
|
+
if c.header_str.match(/Content-Encoding: gzip/)
|
158
|
+
begin
|
159
|
+
gz = Zlib::GzipReader.new(StringIO.new(c.body_str))
|
160
|
+
xml = gz.read
|
161
|
+
gz.close
|
162
|
+
rescue Zlib::GzipFile::Error
|
163
|
+
# Maybe this is not gzipped?
|
164
|
+
xml = c.body_str
|
165
|
+
end
|
166
|
+
elsif c.header_str.match(/Content-Encoding: deflate/)
|
167
|
+
xml = Zlib::Inflate.inflate(c.body_str)
|
163
168
|
else
|
164
|
-
xml =
|
169
|
+
xml = c.body_str
|
165
170
|
end
|
166
171
|
|
167
172
|
xml
|
@@ -228,7 +233,9 @@ module Feedzirra
|
|
228
233
|
responses[url] = feed
|
229
234
|
options[:on_success].call(url, feed) if options.has_key?(:on_success)
|
230
235
|
else
|
231
|
-
|
236
|
+
# puts "Error determining parser for #{url} - #{c.last_effective_url}"
|
237
|
+
# raise NoParserAvailable.new("no valid parser for content.") (this would unfirtunately fail the whole 'multi', so it's not really useable)
|
238
|
+
options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
|
232
239
|
end
|
233
240
|
end
|
234
241
|
|
@@ -1,13 +1,36 @@
|
|
1
1
|
module Feedzirra
|
2
|
-
module FeedEntryUtilities
|
3
|
-
|
2
|
+
module FeedEntryUtilities
|
3
|
+
def published
|
4
|
+
@published || @updated
|
5
|
+
end
|
4
6
|
|
5
7
|
def parse_datetime(string)
|
6
|
-
|
8
|
+
begin
|
9
|
+
DateTime.parse(string).feed_utils_to_gm_time
|
10
|
+
rescue
|
11
|
+
puts "DATE CAN'T BE PARSED: #{string}"
|
12
|
+
nil
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Returns the id of the entry or its url if not id is present, as some formats don't support it
|
18
|
+
def id
|
19
|
+
@id || @url
|
7
20
|
end
|
8
21
|
|
22
|
+
##
|
23
|
+
# Writter for published. By default, we keep the "oldest" publish time found.
|
9
24
|
def published=(val)
|
10
|
-
|
25
|
+
parsed = parse_datetime(val)
|
26
|
+
@published = parsed if !@published || parsed < @published
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# Writter for udapted. By default, we keep the most recenet update time found.
|
31
|
+
def updated=(val)
|
32
|
+
parsed = parse_datetime(val)
|
33
|
+
@updated = parsed if !@updated || parsed > @updated
|
11
34
|
end
|
12
35
|
|
13
36
|
def sanitize!
|
data/lib/feedzirra/rss_entry.rb
CHANGED
@@ -22,6 +22,14 @@ module Feedzirra
|
|
22
22
|
|
23
23
|
element :pubDate, :as => :published
|
24
24
|
element :"dc:date", :as => :published
|
25
|
+
element :"dc:Date", :as => :published
|
26
|
+
element :"dcterms:created", :as => :published
|
27
|
+
|
28
|
+
|
29
|
+
element :"dcterms:modified", :as => :updated
|
30
|
+
element :issued, :as => :published
|
25
31
|
elements :category, :as => :categories
|
32
|
+
|
33
|
+
element :guid, :as => :id
|
26
34
|
end
|
27
35
|
end
|
data/lib/feedzirra.rb
CHANGED
data/spec/feedzirra/feed_spec.rb
CHANGED
@@ -523,7 +523,7 @@ describe Feedzirra::Feed do
|
|
523
523
|
|
524
524
|
it 'should deflate the response body using inflate if the Content-Encoding: is deflate' do
|
525
525
|
@curl_easy.stub!(:header_str).and_return('Content-Encoding: deflate')
|
526
|
-
Zlib::
|
526
|
+
Zlib::Inflate.should_receive(:inflate).with(@curl_easy.body_str)
|
527
527
|
Feedzirra::Feed.decode_content(@curl_easy)
|
528
528
|
end
|
529
529
|
|
metadata
CHANGED
data/lib/feedzirra/rdf.rb
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
module Feedzirra
|
2
|
-
# == Summary
|
3
|
-
# Parser for dealing with RDF feeds.
|
4
|
-
#
|
5
|
-
# == Attributes
|
6
|
-
# * title
|
7
|
-
# * feed_url
|
8
|
-
# * url
|
9
|
-
# * entries
|
10
|
-
class RDF
|
11
|
-
include SAXMachine
|
12
|
-
include FeedUtilities
|
13
|
-
element :title
|
14
|
-
element :link, :as => :url
|
15
|
-
elements :item, :as => :entries, :class => RDFEntry
|
16
|
-
|
17
|
-
attr_accessor :feed_url
|
18
|
-
|
19
|
-
def self.able_to_parse?(xml) #:nodoc:
|
20
|
-
xml =~ /(rdf\:RDF)|(#{Regexp.escape("http://purl.org/rss/1.0")})|(rss version\=\"0\.9.?\")/ || false
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
data/lib/feedzirra/rdf_entry.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
module Feedzirra
|
2
|
-
# == Summary
|
3
|
-
# Parser for dealing with RDF feed entries.
|
4
|
-
#
|
5
|
-
# == Attributes
|
6
|
-
# * title
|
7
|
-
# * url
|
8
|
-
# * author
|
9
|
-
# * content
|
10
|
-
# * summary
|
11
|
-
# * published
|
12
|
-
class RDFEntry
|
13
|
-
include SAXMachine
|
14
|
-
include FeedEntryUtilities
|
15
|
-
element :title
|
16
|
-
element :link, :as => :url
|
17
|
-
element :"dc:creator", :as => :author
|
18
|
-
element :"content:encoded", :as => :content
|
19
|
-
element :description, :as => :summary
|
20
|
-
element :"dc:date", :as => :published
|
21
|
-
end
|
22
|
-
end
|
@@ -1,33 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
-
|
3
|
-
describe Feedzirra::RDFEntry do
|
4
|
-
before(:each) do
|
5
|
-
# I don't really like doing it this way because these unit test should only rely on AtomEntry,
|
6
|
-
# but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
|
7
|
-
@entry = Feedzirra::RDF.parse(sample_rdf_feed).entries.first
|
8
|
-
end
|
9
|
-
|
10
|
-
it "should parse the title" do
|
11
|
-
@entry.title.should == "Chrome, V8 and Strongtalk"
|
12
|
-
end
|
13
|
-
|
14
|
-
it "should parse the url" do
|
15
|
-
@entry.url.should == "http://www.avibryant.com/2008/09/chrome-v8-and-s.html"
|
16
|
-
end
|
17
|
-
|
18
|
-
it "should parse the author" do
|
19
|
-
@entry.author.should == "Avi"
|
20
|
-
end
|
21
|
-
|
22
|
-
it "should parse the content" do
|
23
|
-
@entry.content.should == sample_rdf_entry_content
|
24
|
-
end
|
25
|
-
|
26
|
-
it "should provide a summary" do
|
27
|
-
@entry.summary.should == "There's lots to like about Google's new web browser, Chrome, which was released today. When I read the awesome comic strip introduction yesterday, however, the thing that stood out most for me was in very small type: the name Lars..."
|
28
|
-
end
|
29
|
-
|
30
|
-
it "should parse the published date" do
|
31
|
-
@entry.published.to_s.should == "Tue Sep 02 19:50:07 UTC 2008"
|
32
|
-
end
|
33
|
-
end
|
data/spec/feedzirra/rdf_spec.rb
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
-
|
3
|
-
describe Feedzirra::RDF do
|
4
|
-
describe "#will_parse?" do
|
5
|
-
it "should return true for an rdf feed" do
|
6
|
-
Feedzirra::RDF.should be_able_to_parse(sample_rdf_feed)
|
7
|
-
end
|
8
|
-
|
9
|
-
it "should return false for an atom feed" do
|
10
|
-
Feedzirra::RDF.should_not be_able_to_parse(sample_atom_feed)
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
describe "parsing" do
|
15
|
-
before(:each) do
|
16
|
-
@feed = Feedzirra::RDF.parse(sample_rdf_feed)
|
17
|
-
end
|
18
|
-
|
19
|
-
it "should parse the title" do
|
20
|
-
@feed.title.should == "HREF Considered Harmful"
|
21
|
-
end
|
22
|
-
|
23
|
-
it "should parse the url" do
|
24
|
-
@feed.url.should == "http://www.avibryant.com/"
|
25
|
-
end
|
26
|
-
|
27
|
-
# rdf doesn't actually specify the feed_url. This should be set in the fetcher.
|
28
|
-
it "should provide an accessor for the feed_url" do
|
29
|
-
@feed.respond_to?(:feed_url).should == true
|
30
|
-
@feed.respond_to?(:feed_url=).should == true
|
31
|
-
end
|
32
|
-
|
33
|
-
it "should parse entries" do
|
34
|
-
@feed.entries.size.should == 10
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|