pauldix-feedzirra 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -19,7 +19,11 @@ module Feedzirra
19
19
  element :content
20
20
  element :summary
21
21
  element :published
22
+ element :id
22
23
  element :created, :as => :published
24
+ element :issued, :as => :published
25
+ element :updated
26
+ element :modified, :as => :updated
23
27
  elements :category, :as => :categories, :value => :term
24
28
  end
25
29
  end
@@ -20,6 +20,11 @@ module Feedzirra
20
20
  element :summary
21
21
  element :content
22
22
  element :published
23
+ element :id
24
+ element :issued, :as => :published
25
+ element :created, :as => :published
26
+ element :updated
27
+ element :modified, :as => :updated
23
28
  elements :category, :as => :categories, :value => :term
24
29
  end
25
30
  end
@@ -153,15 +153,20 @@ module Feedzirra
153
153
  # [curl_request<Curl::Easy>] The Curl::Easy response object from the request.
154
154
  # === Returns
155
155
  # A decoded string of XML.
156
- def self.decode_content(curl_request)
157
- if curl_request.header_str.match(/Content-Encoding: gzip/)
158
- gz = Zlib::GzipReader.new(StringIO.new(curl_request.body_str))
159
- xml = gz.read
160
- gz.close
161
- elsif curl_request.header_str.match(/Content-Encoding: deflate/)
162
- xml = Zlib::Deflate.inflate(curl_request.body_str)
156
+ def self.decode_content(c)
157
+ if c.header_str.match(/Content-Encoding: gzip/)
158
+ begin
159
+ gz = Zlib::GzipReader.new(StringIO.new(c.body_str))
160
+ xml = gz.read
161
+ gz.close
162
+ rescue Zlib::GzipFile::Error
163
+ # Maybe this is not gzipped?
164
+ xml = c.body_str
165
+ end
166
+ elsif c.header_str.match(/Content-Encoding: deflate/)
167
+ xml = Zlib::Inflate.inflate(c.body_str)
163
168
  else
164
- xml = curl_request.body_str
169
+ xml = c.body_str
165
170
  end
166
171
 
167
172
  xml
@@ -228,7 +233,9 @@ module Feedzirra
228
233
  responses[url] = feed
229
234
  options[:on_success].call(url, feed) if options.has_key?(:on_success)
230
235
  else
231
- raise NoParserAvailable.new("Error determining parser for #{url} - #{c.last_effective_url}.")
236
+ # puts "Error determining parser for #{url} - #{c.last_effective_url}"
237
+ # raise NoParserAvailable.new("no valid parser for content.") (this would unfirtunately fail the whole 'multi', so it's not really useable)
238
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
232
239
  end
233
240
  end
234
241
 
@@ -1,13 +1,36 @@
1
1
  module Feedzirra
2
- module FeedEntryUtilities
3
- attr_reader :published
2
+ module FeedEntryUtilities
3
+ def published
4
+ @published || @updated
5
+ end
4
6
 
5
7
  def parse_datetime(string)
6
- DateTime.parse(string).feed_utils_to_gm_time
8
+ begin
9
+ DateTime.parse(string).feed_utils_to_gm_time
10
+ rescue
11
+ puts "DATE CAN'T BE PARSED: #{string}"
12
+ nil
13
+ end
14
+ end
15
+
16
+ ##
17
+ # Returns the id of the entry or its url if not id is present, as some formats don't support it
18
+ def id
19
+ @id || @url
7
20
  end
8
21
 
22
+ ##
23
+ # Writter for published. By default, we keep the "oldest" publish time found.
9
24
  def published=(val)
10
- @published = parse_datetime(val)
25
+ parsed = parse_datetime(val)
26
+ @published = parsed if !@published || parsed < @published
27
+ end
28
+
29
+ ##
30
+ # Writter for udapted. By default, we keep the most recenet update time found.
31
+ def updated=(val)
32
+ parsed = parse_datetime(val)
33
+ @updated = parsed if !@updated || parsed > @updated
11
34
  end
12
35
 
13
36
  def sanitize!
@@ -22,6 +22,14 @@ module Feedzirra
22
22
 
23
23
  element :pubDate, :as => :published
24
24
  element :"dc:date", :as => :published
25
+ element :"dc:Date", :as => :published
26
+ element :"dcterms:created", :as => :published
27
+
28
+
29
+ element :"dcterms:modified", :as => :updated
30
+ element :issued, :as => :published
25
31
  elements :category, :as => :categories
32
+
33
+ element :guid, :as => :id
26
34
  end
27
35
  end
data/lib/feedzirra.rb CHANGED
@@ -32,5 +32,5 @@ require 'feedzirra/atom'
32
32
  require 'feedzirra/atom_feed_burner'
33
33
 
34
34
  module Feedzirra
35
- VERSION = "0.0.6"
35
+ VERSION = "0.0.7"
36
36
  end
@@ -34,4 +34,9 @@ describe Feedzirra::AtomEntry do
34
34
  it "should parse the categories" do
35
35
  @entry.categories.should == ['Turkey', 'Seattle']
36
36
  end
37
+
38
+ it "should parse the updated date" do
39
+ @entry.updated.to_s.should == "Fri Jan 16 18:21:00 UTC 2009"
40
+ end
41
+
37
42
  end
@@ -523,7 +523,7 @@ describe Feedzirra::Feed do
523
523
 
524
524
  it 'should deflate the response body using inflate if the Content-Encoding: is deflate' do
525
525
  @curl_easy.stub!(:header_str).and_return('Content-Encoding: deflate')
526
- Zlib::Deflate.should_receive(:inflate).with(@curl_easy.body_str)
526
+ Zlib::Inflate.should_receive(:inflate).with(@curl_easy.body_str)
527
527
  Feedzirra::Feed.decode_content(@curl_easy)
528
528
  end
529
529
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pauldix-feedzirra
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Dix
data/lib/feedzirra/rdf.rb DELETED
@@ -1,23 +0,0 @@
1
- module Feedzirra
2
- # == Summary
3
- # Parser for dealing with RDF feeds.
4
- #
5
- # == Attributes
6
- # * title
7
- # * feed_url
8
- # * url
9
- # * entries
10
- class RDF
11
- include SAXMachine
12
- include FeedUtilities
13
- element :title
14
- element :link, :as => :url
15
- elements :item, :as => :entries, :class => RDFEntry
16
-
17
- attr_accessor :feed_url
18
-
19
- def self.able_to_parse?(xml) #:nodoc:
20
- xml =~ /(rdf\:RDF)|(#{Regexp.escape("http://purl.org/rss/1.0")})|(rss version\=\"0\.9.?\")/ || false
21
- end
22
- end
23
- end
@@ -1,22 +0,0 @@
1
- module Feedzirra
2
- # == Summary
3
- # Parser for dealing with RDF feed entries.
4
- #
5
- # == Attributes
6
- # * title
7
- # * url
8
- # * author
9
- # * content
10
- # * summary
11
- # * published
12
- class RDFEntry
13
- include SAXMachine
14
- include FeedEntryUtilities
15
- element :title
16
- element :link, :as => :url
17
- element :"dc:creator", :as => :author
18
- element :"content:encoded", :as => :content
19
- element :description, :as => :summary
20
- element :"dc:date", :as => :published
21
- end
22
- end
@@ -1,33 +0,0 @@
1
- require File.dirname(__FILE__) + '/../spec_helper'
2
-
3
- describe Feedzirra::RDFEntry do
4
- before(:each) do
5
- # I don't really like doing it this way because these unit test should only rely on AtomEntry,
6
- # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
7
- @entry = Feedzirra::RDF.parse(sample_rdf_feed).entries.first
8
- end
9
-
10
- it "should parse the title" do
11
- @entry.title.should == "Chrome, V8 and Strongtalk"
12
- end
13
-
14
- it "should parse the url" do
15
- @entry.url.should == "http://www.avibryant.com/2008/09/chrome-v8-and-s.html"
16
- end
17
-
18
- it "should parse the author" do
19
- @entry.author.should == "Avi"
20
- end
21
-
22
- it "should parse the content" do
23
- @entry.content.should == sample_rdf_entry_content
24
- end
25
-
26
- it "should provide a summary" do
27
- @entry.summary.should == "There's lots to like about Google's new web browser, Chrome, which was released today. When I read the awesome comic strip introduction yesterday, however, the thing that stood out most for me was in very small type: the name Lars..."
28
- end
29
-
30
- it "should parse the published date" do
31
- @entry.published.to_s.should == "Tue Sep 02 19:50:07 UTC 2008"
32
- end
33
- end
@@ -1,37 +0,0 @@
1
- require File.dirname(__FILE__) + '/../spec_helper'
2
-
3
- describe Feedzirra::RDF do
4
- describe "#will_parse?" do
5
- it "should return true for an rdf feed" do
6
- Feedzirra::RDF.should be_able_to_parse(sample_rdf_feed)
7
- end
8
-
9
- it "should return false for an atom feed" do
10
- Feedzirra::RDF.should_not be_able_to_parse(sample_atom_feed)
11
- end
12
- end
13
-
14
- describe "parsing" do
15
- before(:each) do
16
- @feed = Feedzirra::RDF.parse(sample_rdf_feed)
17
- end
18
-
19
- it "should parse the title" do
20
- @feed.title.should == "HREF Considered Harmful"
21
- end
22
-
23
- it "should parse the url" do
24
- @feed.url.should == "http://www.avibryant.com/"
25
- end
26
-
27
- # rdf doesn't actually specify the feed_url. This should be set in the fetcher.
28
- it "should provide an accessor for the feed_url" do
29
- @feed.respond_to?(:feed_url).should == true
30
- @feed.respond_to?(:feed_url=).should == true
31
- end
32
-
33
- it "should parse entries" do
34
- @feed.entries.size.should == 10
35
- end
36
- end
37
- end