pauldix-feedzirra 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/feedzirra/atom_entry.rb +4 -0
- data/lib/feedzirra/atom_feed_burner_entry.rb +5 -0
- data/lib/feedzirra/feed.rb +16 -9
- data/lib/feedzirra/feed_entry_utilities.rb +27 -4
- data/lib/feedzirra/rss_entry.rb +8 -0
- data/lib/feedzirra.rb +1 -1
- data/spec/feedzirra/atom_entry_spec.rb +5 -0
- data/spec/feedzirra/feed_spec.rb +1 -1
- metadata +1 -1
- data/lib/feedzirra/rdf.rb +0 -23
- data/lib/feedzirra/rdf_entry.rb +0 -22
- data/spec/feedzirra/rdf_entry_spec.rb +0 -33
- data/spec/feedzirra/rdf_spec.rb +0 -37
data/lib/feedzirra/atom_entry.rb
CHANGED
@@ -19,7 +19,11 @@ module Feedzirra
|
|
19
19
|
element :content
|
20
20
|
element :summary
|
21
21
|
element :published
|
22
|
+
element :id
|
22
23
|
element :created, :as => :published
|
24
|
+
element :issued, :as => :published
|
25
|
+
element :updated
|
26
|
+
element :modified, :as => :updated
|
23
27
|
elements :category, :as => :categories, :value => :term
|
24
28
|
end
|
25
29
|
end
|
@@ -20,6 +20,11 @@ module Feedzirra
|
|
20
20
|
element :summary
|
21
21
|
element :content
|
22
22
|
element :published
|
23
|
+
element :id
|
24
|
+
element :issued, :as => :published
|
25
|
+
element :created, :as => :published
|
26
|
+
element :updated
|
27
|
+
element :modified, :as => :updated
|
23
28
|
elements :category, :as => :categories, :value => :term
|
24
29
|
end
|
25
30
|
end
|
data/lib/feedzirra/feed.rb
CHANGED
@@ -153,15 +153,20 @@ module Feedzirra
|
|
153
153
|
# [curl_request<Curl::Easy>] The Curl::Easy response object from the request.
|
154
154
|
# === Returns
|
155
155
|
# A decoded string of XML.
|
156
|
-
def self.decode_content(
|
157
|
-
if
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
156
|
+
def self.decode_content(c)
|
157
|
+
if c.header_str.match(/Content-Encoding: gzip/)
|
158
|
+
begin
|
159
|
+
gz = Zlib::GzipReader.new(StringIO.new(c.body_str))
|
160
|
+
xml = gz.read
|
161
|
+
gz.close
|
162
|
+
rescue Zlib::GzipFile::Error
|
163
|
+
# Maybe this is not gzipped?
|
164
|
+
xml = c.body_str
|
165
|
+
end
|
166
|
+
elsif c.header_str.match(/Content-Encoding: deflate/)
|
167
|
+
xml = Zlib::Inflate.inflate(c.body_str)
|
163
168
|
else
|
164
|
-
xml =
|
169
|
+
xml = c.body_str
|
165
170
|
end
|
166
171
|
|
167
172
|
xml
|
@@ -228,7 +233,9 @@ module Feedzirra
|
|
228
233
|
responses[url] = feed
|
229
234
|
options[:on_success].call(url, feed) if options.has_key?(:on_success)
|
230
235
|
else
|
231
|
-
|
236
|
+
# puts "Error determining parser for #{url} - #{c.last_effective_url}"
|
237
|
+
# raise NoParserAvailable.new("no valid parser for content.") (this would unfirtunately fail the whole 'multi', so it's not really useable)
|
238
|
+
options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
|
232
239
|
end
|
233
240
|
end
|
234
241
|
|
@@ -1,13 +1,36 @@
|
|
1
1
|
module Feedzirra
|
2
|
-
module FeedEntryUtilities
|
3
|
-
|
2
|
+
module FeedEntryUtilities
|
3
|
+
def published
|
4
|
+
@published || @updated
|
5
|
+
end
|
4
6
|
|
5
7
|
def parse_datetime(string)
|
6
|
-
|
8
|
+
begin
|
9
|
+
DateTime.parse(string).feed_utils_to_gm_time
|
10
|
+
rescue
|
11
|
+
puts "DATE CAN'T BE PARSED: #{string}"
|
12
|
+
nil
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Returns the id of the entry or its url if not id is present, as some formats don't support it
|
18
|
+
def id
|
19
|
+
@id || @url
|
7
20
|
end
|
8
21
|
|
22
|
+
##
|
23
|
+
# Writter for published. By default, we keep the "oldest" publish time found.
|
9
24
|
def published=(val)
|
10
|
-
|
25
|
+
parsed = parse_datetime(val)
|
26
|
+
@published = parsed if !@published || parsed < @published
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# Writter for udapted. By default, we keep the most recenet update time found.
|
31
|
+
def updated=(val)
|
32
|
+
parsed = parse_datetime(val)
|
33
|
+
@updated = parsed if !@updated || parsed > @updated
|
11
34
|
end
|
12
35
|
|
13
36
|
def sanitize!
|
data/lib/feedzirra/rss_entry.rb
CHANGED
@@ -22,6 +22,14 @@ module Feedzirra
|
|
22
22
|
|
23
23
|
element :pubDate, :as => :published
|
24
24
|
element :"dc:date", :as => :published
|
25
|
+
element :"dc:Date", :as => :published
|
26
|
+
element :"dcterms:created", :as => :published
|
27
|
+
|
28
|
+
|
29
|
+
element :"dcterms:modified", :as => :updated
|
30
|
+
element :issued, :as => :published
|
25
31
|
elements :category, :as => :categories
|
32
|
+
|
33
|
+
element :guid, :as => :id
|
26
34
|
end
|
27
35
|
end
|
data/lib/feedzirra.rb
CHANGED
data/spec/feedzirra/feed_spec.rb
CHANGED
@@ -523,7 +523,7 @@ describe Feedzirra::Feed do
|
|
523
523
|
|
524
524
|
it 'should deflate the response body using inflate if the Content-Encoding: is deflate' do
|
525
525
|
@curl_easy.stub!(:header_str).and_return('Content-Encoding: deflate')
|
526
|
-
Zlib::
|
526
|
+
Zlib::Inflate.should_receive(:inflate).with(@curl_easy.body_str)
|
527
527
|
Feedzirra::Feed.decode_content(@curl_easy)
|
528
528
|
end
|
529
529
|
|
metadata
CHANGED
data/lib/feedzirra/rdf.rb
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
module Feedzirra
|
2
|
-
# == Summary
|
3
|
-
# Parser for dealing with RDF feeds.
|
4
|
-
#
|
5
|
-
# == Attributes
|
6
|
-
# * title
|
7
|
-
# * feed_url
|
8
|
-
# * url
|
9
|
-
# * entries
|
10
|
-
class RDF
|
11
|
-
include SAXMachine
|
12
|
-
include FeedUtilities
|
13
|
-
element :title
|
14
|
-
element :link, :as => :url
|
15
|
-
elements :item, :as => :entries, :class => RDFEntry
|
16
|
-
|
17
|
-
attr_accessor :feed_url
|
18
|
-
|
19
|
-
def self.able_to_parse?(xml) #:nodoc:
|
20
|
-
xml =~ /(rdf\:RDF)|(#{Regexp.escape("http://purl.org/rss/1.0")})|(rss version\=\"0\.9.?\")/ || false
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
data/lib/feedzirra/rdf_entry.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
module Feedzirra
|
2
|
-
# == Summary
|
3
|
-
# Parser for dealing with RDF feed entries.
|
4
|
-
#
|
5
|
-
# == Attributes
|
6
|
-
# * title
|
7
|
-
# * url
|
8
|
-
# * author
|
9
|
-
# * content
|
10
|
-
# * summary
|
11
|
-
# * published
|
12
|
-
class RDFEntry
|
13
|
-
include SAXMachine
|
14
|
-
include FeedEntryUtilities
|
15
|
-
element :title
|
16
|
-
element :link, :as => :url
|
17
|
-
element :"dc:creator", :as => :author
|
18
|
-
element :"content:encoded", :as => :content
|
19
|
-
element :description, :as => :summary
|
20
|
-
element :"dc:date", :as => :published
|
21
|
-
end
|
22
|
-
end
|
@@ -1,33 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
-
|
3
|
-
describe Feedzirra::RDFEntry do
|
4
|
-
before(:each) do
|
5
|
-
# I don't really like doing it this way because these unit test should only rely on AtomEntry,
|
6
|
-
# but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
|
7
|
-
@entry = Feedzirra::RDF.parse(sample_rdf_feed).entries.first
|
8
|
-
end
|
9
|
-
|
10
|
-
it "should parse the title" do
|
11
|
-
@entry.title.should == "Chrome, V8 and Strongtalk"
|
12
|
-
end
|
13
|
-
|
14
|
-
it "should parse the url" do
|
15
|
-
@entry.url.should == "http://www.avibryant.com/2008/09/chrome-v8-and-s.html"
|
16
|
-
end
|
17
|
-
|
18
|
-
it "should parse the author" do
|
19
|
-
@entry.author.should == "Avi"
|
20
|
-
end
|
21
|
-
|
22
|
-
it "should parse the content" do
|
23
|
-
@entry.content.should == sample_rdf_entry_content
|
24
|
-
end
|
25
|
-
|
26
|
-
it "should provide a summary" do
|
27
|
-
@entry.summary.should == "There's lots to like about Google's new web browser, Chrome, which was released today. When I read the awesome comic strip introduction yesterday, however, the thing that stood out most for me was in very small type: the name Lars..."
|
28
|
-
end
|
29
|
-
|
30
|
-
it "should parse the published date" do
|
31
|
-
@entry.published.to_s.should == "Tue Sep 02 19:50:07 UTC 2008"
|
32
|
-
end
|
33
|
-
end
|
data/spec/feedzirra/rdf_spec.rb
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
-
|
3
|
-
describe Feedzirra::RDF do
|
4
|
-
describe "#will_parse?" do
|
5
|
-
it "should return true for an rdf feed" do
|
6
|
-
Feedzirra::RDF.should be_able_to_parse(sample_rdf_feed)
|
7
|
-
end
|
8
|
-
|
9
|
-
it "should return false for an atom feed" do
|
10
|
-
Feedzirra::RDF.should_not be_able_to_parse(sample_atom_feed)
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
describe "parsing" do
|
15
|
-
before(:each) do
|
16
|
-
@feed = Feedzirra::RDF.parse(sample_rdf_feed)
|
17
|
-
end
|
18
|
-
|
19
|
-
it "should parse the title" do
|
20
|
-
@feed.title.should == "HREF Considered Harmful"
|
21
|
-
end
|
22
|
-
|
23
|
-
it "should parse the url" do
|
24
|
-
@feed.url.should == "http://www.avibryant.com/"
|
25
|
-
end
|
26
|
-
|
27
|
-
# rdf doesn't actually specify the feed_url. This should be set in the fetcher.
|
28
|
-
it "should provide an accessor for the feed_url" do
|
29
|
-
@feed.respond_to?(:feed_url).should == true
|
30
|
-
@feed.respond_to?(:feed_url=).should == true
|
31
|
-
end
|
32
|
-
|
33
|
-
it "should parse entries" do
|
34
|
-
@feed.entries.size.should == 10
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|