pauldix-feedzirra 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,11 @@ module Feedzirra
19
19
  element :content
20
20
  element :summary
21
21
  element :published
22
+ element :id
22
23
  element :created, :as => :published
24
+ element :issued, :as => :published
25
+ element :updated
26
+ element :modified, :as => :updated
23
27
  elements :category, :as => :categories, :value => :term
24
28
  end
25
29
  end
@@ -20,6 +20,11 @@ module Feedzirra
20
20
  element :summary
21
21
  element :content
22
22
  element :published
23
+ element :id
24
+ element :issued, :as => :published
25
+ element :created, :as => :published
26
+ element :updated
27
+ element :modified, :as => :updated
23
28
  elements :category, :as => :categories, :value => :term
24
29
  end
25
30
  end
@@ -153,15 +153,20 @@ module Feedzirra
153
153
  # [curl_request<Curl::Easy>] The Curl::Easy response object from the request.
154
154
  # === Returns
155
155
  # A decoded string of XML.
156
- def self.decode_content(curl_request)
157
- if curl_request.header_str.match(/Content-Encoding: gzip/)
158
- gz = Zlib::GzipReader.new(StringIO.new(curl_request.body_str))
159
- xml = gz.read
160
- gz.close
161
- elsif curl_request.header_str.match(/Content-Encoding: deflate/)
162
- xml = Zlib::Deflate.inflate(curl_request.body_str)
156
+ def self.decode_content(c)
157
+ if c.header_str.match(/Content-Encoding: gzip/)
158
+ begin
159
+ gz = Zlib::GzipReader.new(StringIO.new(c.body_str))
160
+ xml = gz.read
161
+ gz.close
162
+ rescue Zlib::GzipFile::Error
163
+ # Maybe this is not gzipped?
164
+ xml = c.body_str
165
+ end
166
+ elsif c.header_str.match(/Content-Encoding: deflate/)
167
+ xml = Zlib::Inflate.inflate(c.body_str)
163
168
  else
164
- xml = curl_request.body_str
169
+ xml = c.body_str
165
170
  end
166
171
 
167
172
  xml
@@ -228,7 +233,9 @@ module Feedzirra
228
233
  responses[url] = feed
229
234
  options[:on_success].call(url, feed) if options.has_key?(:on_success)
230
235
  else
231
- raise NoParserAvailable.new("Error determining parser for #{url} - #{c.last_effective_url}.")
236
+ # puts "Error determining parser for #{url} - #{c.last_effective_url}"
237
+ # raise NoParserAvailable.new("no valid parser for content.") (this would unfirtunately fail the whole 'multi', so it's not really useable)
238
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
232
239
  end
233
240
  end
234
241
 
@@ -1,13 +1,36 @@
1
1
  module Feedzirra
2
- module FeedEntryUtilities
3
- attr_reader :published
2
+ module FeedEntryUtilities
3
+ def published
4
+ @published || @updated
5
+ end
4
6
 
5
7
  def parse_datetime(string)
6
- DateTime.parse(string).feed_utils_to_gm_time
8
+ begin
9
+ DateTime.parse(string).feed_utils_to_gm_time
10
+ rescue
11
+ puts "DATE CAN'T BE PARSED: #{string}"
12
+ nil
13
+ end
14
+ end
15
+
16
+ ##
17
+ # Returns the id of the entry or its url if not id is present, as some formats don't support it
18
+ def id
19
+ @id || @url
7
20
  end
8
21
 
22
+ ##
23
+ # Writter for published. By default, we keep the "oldest" publish time found.
9
24
  def published=(val)
10
- @published = parse_datetime(val)
25
+ parsed = parse_datetime(val)
26
+ @published = parsed if !@published || parsed < @published
27
+ end
28
+
29
+ ##
30
+ # Writter for udapted. By default, we keep the most recenet update time found.
31
+ def updated=(val)
32
+ parsed = parse_datetime(val)
33
+ @updated = parsed if !@updated || parsed > @updated
11
34
  end
12
35
 
13
36
  def sanitize!
@@ -22,6 +22,14 @@ module Feedzirra
22
22
 
23
23
  element :pubDate, :as => :published
24
24
  element :"dc:date", :as => :published
25
+ element :"dc:Date", :as => :published
26
+ element :"dcterms:created", :as => :published
27
+
28
+
29
+ element :"dcterms:modified", :as => :updated
30
+ element :issued, :as => :published
25
31
  elements :category, :as => :categories
32
+
33
+ element :guid, :as => :id
26
34
  end
27
35
  end
data/lib/feedzirra.rb CHANGED
@@ -32,5 +32,5 @@ require 'feedzirra/atom'
32
32
  require 'feedzirra/atom_feed_burner'
33
33
 
34
34
  module Feedzirra
35
- VERSION = "0.0.6"
35
+ VERSION = "0.0.7"
36
36
  end
@@ -34,4 +34,9 @@ describe Feedzirra::AtomEntry do
34
34
  it "should parse the categories" do
35
35
  @entry.categories.should == ['Turkey', 'Seattle']
36
36
  end
37
+
38
+ it "should parse the updated date" do
39
+ @entry.updated.to_s.should == "Fri Jan 16 18:21:00 UTC 2009"
40
+ end
41
+
37
42
  end
@@ -523,7 +523,7 @@ describe Feedzirra::Feed do
523
523
 
524
524
  it 'should deflate the response body using inflate if the Content-Encoding: is deflate' do
525
525
  @curl_easy.stub!(:header_str).and_return('Content-Encoding: deflate')
526
- Zlib::Deflate.should_receive(:inflate).with(@curl_easy.body_str)
526
+ Zlib::Inflate.should_receive(:inflate).with(@curl_easy.body_str)
527
527
  Feedzirra::Feed.decode_content(@curl_easy)
528
528
  end
529
529
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pauldix-feedzirra
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Dix
data/lib/feedzirra/rdf.rb DELETED
@@ -1,23 +0,0 @@
1
- module Feedzirra
2
- # == Summary
3
- # Parser for dealing with RDF feeds.
4
- #
5
- # == Attributes
6
- # * title
7
- # * feed_url
8
- # * url
9
- # * entries
10
- class RDF
11
- include SAXMachine
12
- include FeedUtilities
13
- element :title
14
- element :link, :as => :url
15
- elements :item, :as => :entries, :class => RDFEntry
16
-
17
- attr_accessor :feed_url
18
-
19
- def self.able_to_parse?(xml) #:nodoc:
20
- xml =~ /(rdf\:RDF)|(#{Regexp.escape("http://purl.org/rss/1.0")})|(rss version\=\"0\.9.?\")/ || false
21
- end
22
- end
23
- end
@@ -1,22 +0,0 @@
1
- module Feedzirra
2
- # == Summary
3
- # Parser for dealing with RDF feed entries.
4
- #
5
- # == Attributes
6
- # * title
7
- # * url
8
- # * author
9
- # * content
10
- # * summary
11
- # * published
12
- class RDFEntry
13
- include SAXMachine
14
- include FeedEntryUtilities
15
- element :title
16
- element :link, :as => :url
17
- element :"dc:creator", :as => :author
18
- element :"content:encoded", :as => :content
19
- element :description, :as => :summary
20
- element :"dc:date", :as => :published
21
- end
22
- end
@@ -1,33 +0,0 @@
1
- require File.dirname(__FILE__) + '/../spec_helper'
2
-
3
- describe Feedzirra::RDFEntry do
4
- before(:each) do
5
- # I don't really like doing it this way because these unit test should only rely on AtomEntry,
6
- # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
7
- @entry = Feedzirra::RDF.parse(sample_rdf_feed).entries.first
8
- end
9
-
10
- it "should parse the title" do
11
- @entry.title.should == "Chrome, V8 and Strongtalk"
12
- end
13
-
14
- it "should parse the url" do
15
- @entry.url.should == "http://www.avibryant.com/2008/09/chrome-v8-and-s.html"
16
- end
17
-
18
- it "should parse the author" do
19
- @entry.author.should == "Avi"
20
- end
21
-
22
- it "should parse the content" do
23
- @entry.content.should == sample_rdf_entry_content
24
- end
25
-
26
- it "should provide a summary" do
27
- @entry.summary.should == "There's lots to like about Google's new web browser, Chrome, which was released today. When I read the awesome comic strip introduction yesterday, however, the thing that stood out most for me was in very small type: the name Lars..."
28
- end
29
-
30
- it "should parse the published date" do
31
- @entry.published.to_s.should == "Tue Sep 02 19:50:07 UTC 2008"
32
- end
33
- end
@@ -1,37 +0,0 @@
1
- require File.dirname(__FILE__) + '/../spec_helper'
2
-
3
- describe Feedzirra::RDF do
4
- describe "#will_parse?" do
5
- it "should return true for an rdf feed" do
6
- Feedzirra::RDF.should be_able_to_parse(sample_rdf_feed)
7
- end
8
-
9
- it "should return false for an atom feed" do
10
- Feedzirra::RDF.should_not be_able_to_parse(sample_atom_feed)
11
- end
12
- end
13
-
14
- describe "parsing" do
15
- before(:each) do
16
- @feed = Feedzirra::RDF.parse(sample_rdf_feed)
17
- end
18
-
19
- it "should parse the title" do
20
- @feed.title.should == "HREF Considered Harmful"
21
- end
22
-
23
- it "should parse the url" do
24
- @feed.url.should == "http://www.avibryant.com/"
25
- end
26
-
27
- # rdf doesn't actually specify the feed_url. This should be set in the fetcher.
28
- it "should provide an accessor for the feed_url" do
29
- @feed.respond_to?(:feed_url).should == true
30
- @feed.respond_to?(:feed_url=).should == true
31
- end
32
-
33
- it "should parse entries" do
34
- @feed.entries.size.should == 10
35
- end
36
- end
37
- end