RubyGems - pauldix-feedzirra - Versions diffs - 0.0.6 → 0.0.7 - Mend

pauldix-feedzirra 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

data/lib/feedzirra/atom_entry.rb +4 -0
data/lib/feedzirra/atom_feed_burner_entry.rb +5 -0
data/lib/feedzirra/feed.rb +16 -9
data/lib/feedzirra/feed_entry_utilities.rb +27 -4
data/lib/feedzirra/rss_entry.rb +8 -0
data/lib/feedzirra.rb +1 -1
data/spec/feedzirra/atom_entry_spec.rb +5 -0
data/spec/feedzirra/feed_spec.rb +1 -1
metadata +1 -1
data/lib/feedzirra/rdf.rb +0 -23
data/lib/feedzirra/rdf_entry.rb +0 -22
data/spec/feedzirra/rdf_entry_spec.rb +0 -33
data/spec/feedzirra/rdf_spec.rb +0 -37

data/lib/feedzirra/atom_entry.rb CHANGED Viewed

@@ -19,7 +19,11 @@ module Feedzirra
     element :content
     element :summary
     element :published
+    element :id
     element :created, :as => :published
+    element :issued, :as => :published
+    element :updated
+    element :modified, :as => :updated
     elements :category, :as => :categories, :value => :term
   end
 end

data/lib/feedzirra/atom_feed_burner_entry.rb CHANGED Viewed

@@ -20,6 +20,11 @@ module Feedzirra
     element :summary
     element :content
     element :published
+    element :id
+    element :issued, :as => :published
+    element :created, :as => :published
+    element :updated
+    element :modified, :as => :updated
     elements :category, :as => :categories, :value => :term
   end
 end

data/lib/feedzirra/feed.rb CHANGED Viewed

@@ -153,15 +153,20 @@ module Feedzirra
     # [curl_request<Curl::Easy>] The Curl::Easy response object from the request.
     # === Returns
     # A decoded string of XML.
-    def self.decode_content(curl_request)
-      if curl_request.header_str.match(/Content-Encoding: gzip/)
-        gz =  Zlib::GzipReader.new(StringIO.new(curl_request.body_str))
-        xml = gz.read
-        gz.close
-      elsif curl_request.header_str.match(/Content-Encoding: deflate/)
-        xml = Zlib::Deflate.inflate(curl_request.body_str)
+    def self.decode_content(c)
+      if c.header_str.match(/Content-Encoding: gzip/)
+        begin
+          gz =  Zlib::GzipReader.new(StringIO.new(c.body_str))
+          xml = gz.read
+          gz.close
+        rescue Zlib::GzipFile::Error
+          # Maybe this is not gzipped?
+          xml = c.body_str
+        end
+      elsif c.header_str.match(/Content-Encoding: deflate/)
+        xml = Zlib::Inflate.inflate(c.body_str)
       else
-        xml = curl_request.body_str
+        xml = c.body_str
       end
       xml
@@ -228,7 +233,9 @@ module Feedzirra
             responses[url] = feed
             options[:on_success].call(url, feed) if options.has_key?(:on_success)
           else
-            raise NoParserAvailable.new("Error determining parser for #{url} - #{c.last_effective_url}.")
+            # puts "Error determining parser for #{url} - #{c.last_effective_url}"
+            # raise NoParserAvailable.new("no valid parser for content.") (this would unfirtunately fail the whole 'multi', so it's not really useable)
+            options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
           end
         end

data/lib/feedzirra/feed_entry_utilities.rb CHANGED Viewed

@@ -1,13 +1,36 @@
 module Feedzirra
-  module FeedEntryUtilities
-    attr_reader :published
+  module FeedEntryUtilities
+    def published
+      @published || @updated
+    end
     def parse_datetime(string)
-      DateTime.parse(string).feed_utils_to_gm_time
+      begin
+        DateTime.parse(string).feed_utils_to_gm_time
+      rescue
+        puts "DATE CAN'T BE PARSED: #{string}"
+        nil
+      end
+    end
+    ##
+    # Returns the id of the entry or its url if not id is present, as some formats don't support it
+    def id
+      @id || @url
     end
+    ##
+    # Writter for published. By default, we keep the "oldest" publish time found.
     def published=(val)
-      @published = parse_datetime(val)
+      parsed = parse_datetime(val)
+      @published = parsed if !@published || parsed < @published
+    end
+    ##
+    # Writter for udapted. By default, we keep the most recenet update time found.
+    def updated=(val)
+      parsed = parse_datetime(val)
+      @updated = parsed if !@updated || parsed > @updated
     end
     def sanitize!

data/lib/feedzirra/rss_entry.rb CHANGED Viewed

@@ -22,6 +22,14 @@ module Feedzirra
     element :pubDate, :as => :published
     element :"dc:date", :as => :published
+    element :"dc:Date", :as => :published
+    element :"dcterms:created", :as => :published
+    element :"dcterms:modified", :as => :updated
+    element :issued, :as => :published
     elements :category, :as => :categories
+    element :guid, :as => :id
   end
 end

data/lib/feedzirra.rb CHANGED Viewed

@@ -32,5 +32,5 @@ require 'feedzirra/atom'
 require 'feedzirra/atom_feed_burner'
 module Feedzirra
-  VERSION = "0.0.6"
+  VERSION = "0.0.7"
 end

data/spec/feedzirra/atom_entry_spec.rb CHANGED Viewed

@@ -34,4 +34,9 @@ describe Feedzirra::AtomEntry do
   it "should parse the categories" do
     @entry.categories.should == ['Turkey', 'Seattle']
   end
+  it "should parse the updated date" do
+    @entry.updated.to_s.should == "Fri Jan 16 18:21:00 UTC 2009"
+  end
 end

data/spec/feedzirra/feed_spec.rb CHANGED Viewed

@@ -523,7 +523,7 @@ describe Feedzirra::Feed do
       it 'should deflate the response body using inflate if the Content-Encoding: is deflate' do
         @curl_easy.stub!(:header_str).and_return('Content-Encoding: deflate')
-        Zlib::Deflate.should_receive(:inflate).with(@curl_easy.body_str)
+        Zlib::Inflate.should_receive(:inflate).with(@curl_easy.body_str)
         Feedzirra::Feed.decode_content(@curl_easy)
       end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: pauldix-feedzirra
 version: !ruby/object:Gem::Version
-  version: 0.0.6
+  version: 0.0.7
 platform: ruby
 authors:
 - Paul Dix

data/lib/feedzirra/rdf.rb DELETED Viewed

@@ -1,23 +0,0 @@
-module Feedzirra
-  # == Summary
-  # Parser for dealing with RDF feeds.
-  #
-  # == Attributes
-  # * title
-  # * feed_url
-  # * url
-  # * entries
-  class RDF
-    include SAXMachine
-    include FeedUtilities
-    element :title
-    element :link, :as => :url
-    elements :item, :as => :entries, :class => RDFEntry
-    attr_accessor :feed_url
-    def self.able_to_parse?(xml) #:nodoc:
-      xml =~ /(rdf\:RDF)|(#{Regexp.escape("http://purl.org/rss/1.0")})|(rss version\=\"0\.9.?\")/ || false
-    end
-  end
-end

data/lib/feedzirra/rdf_entry.rb DELETED Viewed

@@ -1,22 +0,0 @@
-module Feedzirra
-  # == Summary
-  # Parser for dealing with RDF feed entries.
-  #
-  # == Attributes
-  # * title
-  # * url
-  # * author
-  # * content
-  # * summary
-  # * published
-  class RDFEntry
-    include SAXMachine
-    include FeedEntryUtilities
-    element :title
-    element :link, :as => :url
-    element :"dc:creator", :as => :author
-    element :"content:encoded", :as => :content
-    element :description, :as => :summary
-    element :"dc:date", :as => :published
-  end
-end

data/spec/feedzirra/rdf_entry_spec.rb DELETED Viewed

@@ -1,33 +0,0 @@
-require File.dirname(__FILE__) + '/../spec_helper'
-describe Feedzirra::RDFEntry do
-  before(:each) do
-    # I don't really like doing it this way because these unit test should only rely on AtomEntry,
-    # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
-    @entry = Feedzirra::RDF.parse(sample_rdf_feed).entries.first
-  end
-  it "should parse the title" do
-    @entry.title.should == "Chrome, V8 and Strongtalk"
-  end
-  it "should parse the url" do
-    @entry.url.should == "http://www.avibryant.com/2008/09/chrome-v8-and-s.html"
-  end
-  it "should parse the author" do
-    @entry.author.should == "Avi"
-  end
-  it "should parse the content" do
-    @entry.content.should == sample_rdf_entry_content
-  end
-  it "should provide a summary" do
-    @entry.summary.should == "There's lots to like about Google's new web browser, Chrome, which was released today. When I read the awesome comic strip introduction yesterday, however, the thing that stood out most for me was in very small type: the name Lars..."
-  end
-  it "should parse the published date" do
-    @entry.published.to_s.should == "Tue Sep 02 19:50:07 UTC 2008"
-  end
-end

data/spec/feedzirra/rdf_spec.rb DELETED Viewed

@@ -1,37 +0,0 @@
-require File.dirname(__FILE__) + '/../spec_helper'
-describe Feedzirra::RDF do
-  describe "#will_parse?" do
-    it "should return true for an rdf feed" do
-      Feedzirra::RDF.should be_able_to_parse(sample_rdf_feed)
-    end
-    it "should return false for an atom feed" do
-      Feedzirra::RDF.should_not be_able_to_parse(sample_atom_feed)
-    end
-  end
-  describe "parsing" do
-    before(:each) do
-      @feed = Feedzirra::RDF.parse(sample_rdf_feed)
-    end
-    it "should parse the title" do
-      @feed.title.should == "HREF Considered Harmful"
-    end
-    it "should parse the url" do
-      @feed.url.should == "http://www.avibryant.com/"
-    end
-    # rdf doesn't actually specify the feed_url. This should be set in the fetcher.
-    it "should provide an accessor for the feed_url" do
-      @feed.respond_to?(:feed_url).should == true
-      @feed.respond_to?(:feed_url=).should == true
-    end
-    it "should parse entries" do
-      @feed.entries.size.should == 10
-    end
-  end
-end