feedzirra 0.2.0.rc2 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/.travis.yml CHANGED
@@ -3,7 +3,6 @@ rvm:
3
3
  - 1.8.7
4
4
  - 1.9.2
5
5
  - 1.9.3
6
+ - 2.0.0
6
7
  - rbx-18mode
7
8
  - rbx-19mode
8
- notifications:
9
- irc: "irc.freenode.org#feedzirra"
data/Gemfile CHANGED
@@ -1,10 +1,9 @@
1
- source :rubygems
1
+ source 'https://rubygems.org/'
2
2
 
3
3
  gemspec
4
4
 
5
5
  group :development, :test do
6
6
  gem 'rake'
7
7
  gem 'guard-rspec'
8
- gem 'growl', :require => false
9
8
  gem 'simplecov', :require => false, :platforms => :mri_19
10
9
  end
data/Guardfile CHANGED
@@ -1,4 +1,4 @@
1
- guard 'rspec', :rvm => ['1.9.3@feedzirra', 'rbx-head@feedzirra'] do
1
+ guard 'rspec' do
2
2
  watch(%r{^spec/.+_spec\.rb$})
3
3
  watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
4
4
  watch('spec/spec_helper.rb') { "spec" }
data/HISTORY.md CHANGED
@@ -1,3 +1,9 @@
1
+ # HEAD
2
+ * Use `Time.parse_safely` in `Feed.last_modified_from_header` [[#129](https://github.com/pauldix/feedzirra/pull/129)].
3
+ * Added image to the RSS Entry Parser [[#103](https://github.com/pauldix/feedzirra/pull/103)].
4
+ * Compatibility fixes for Ruby 2.0 [[#136](https://github.com/pauldix/feedzirra/pull/136)].
5
+ * Remove gorillib dependency [[#113](https://github.com/pauldix/feedzirra/pull/113)].
6
+
1
7
  # 0.2.0.rc2
2
8
  * Bump sax-machine to `v0.2.0.rc1`, fixes encoding issues [[#76](https://github.com/pauldix/feedzirra/issues/76)].
3
9
 
data/feedzirra.gemspec CHANGED
@@ -6,12 +6,11 @@ Gem::Specification.new do |s|
6
6
  s.version = Feedzirra::VERSION
7
7
 
8
8
  s.authors = ['Paul Dix', 'Julien Kirch', "Ezekiel Templin"]
9
- s.date = Date.today
10
9
  s.email = 'feedzirra@googlegroups.com'
11
10
  s.homepage = 'http://github.com/pauldix/feedzirra'
12
11
 
13
12
  s.summary = 'A feed fetching and parsing library'
14
- s.description = 'A feed fetching and parsing library that treats the internet like Godzilla treats Japan: it dominates and eats all.'
13
+ s.description = 'A library designed to retrieve and parse feeds as quickly as possible'
15
14
 
16
15
  s.files = `git ls-files`.split("\n")
17
16
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
@@ -19,11 +18,10 @@ Gem::Specification.new do |s|
19
18
 
20
19
  s.platform = Gem::Platform::RUBY
21
20
 
22
- s.add_dependency 'nokogiri', '~> 1.5.3'
23
- s.add_dependency 'sax-machine', '~> 0.2.0.rc1'
24
- s.add_dependency 'curb', '~> 0.8.0'
21
+ s.add_dependency 'nokogiri', '~> 1.6.0'
22
+ s.add_dependency 'sax-machine', '~> 0.2.1'
23
+ s.add_dependency 'curb', '~> 0.8.1'
25
24
  s.add_dependency 'loofah', '~> 1.2.1'
26
- s.add_dependency 'gorillib', '~> 0.1.9'
27
25
 
28
- s.add_development_dependency 'rspec', '~> 2.10.0'
26
+ s.add_development_dependency 'rspec', '~> 2.13.0'
29
27
  end
data/lib/feedzirra.rb CHANGED
@@ -3,7 +3,6 @@ require 'curb'
3
3
  require 'sax-machine'
4
4
  require 'loofah'
5
5
  require 'uri'
6
- require 'gorillib/datetime/parse'
7
6
 
8
7
  require 'feedzirra/core_ext'
9
8
  require 'feedzirra/version'
@@ -1,3 +1,3 @@
1
- Dir["#{File.dirname(__FILE__)}/core_ext/*.rb"].sort.each do |path|
2
- require "feedzirra/core_ext/#{File.basename(path, '.rb')}"
3
- end
1
+ require "feedzirra/core_ext/time"
2
+ require "feedzirra/core_ext/date"
3
+ require "feedzirra/core_ext/string"
@@ -0,0 +1,29 @@
1
+ require "time"
2
+ require "date"
3
+
4
+ class Time
5
+ # Parse a time string and convert it to UTC without raising errors.
6
+ # Parses a flattened 14-digit time (YYYYmmddHHMMMSS) as UTC.
7
+ #
8
+ # === Parameters
9
+ # [dt<String or Time>] Time definition to be parsed.
10
+ #
11
+ # === Returns
12
+ # A Time instance in UTC or nil if there were errors while parsing.
13
+ def self.parse_safely(dt)
14
+ if dt
15
+ case
16
+ when dt.is_a?(Time)
17
+ dt.utc
18
+ when dt.respond_to?(:empty?) && dt.empty?
19
+ nil
20
+ when dt.to_s =~ /\A\d{14}\z/
21
+ parse("#{dt.to_s}Z", true)
22
+ else
23
+ parse(dt.to_s, true).utc
24
+ end
25
+ end
26
+ rescue StandardError
27
+ nil
28
+ end unless method_defined?(:parse_safely)
29
+ end
@@ -44,7 +44,7 @@ module Feedzirra
44
44
  # === Returns
45
45
  # A array of class names.
46
46
  def self.feed_classes
47
- @feed_classes ||= [Feedzirra::Parser::RSSFeedBurner, Feedzirra::Parser::RSS, Feedzirra::Parser::GoogleDocsAtom, Feedzirra::Parser::AtomFeedBurner, Feedzirra::Parser::Atom, Feedzirra::Parser::ITunesRSS]
47
+ @feed_classes ||= [Feedzirra::Parser::RSSFeedBurner, Feedzirra::Parser::GoogleDocsAtom, Feedzirra::Parser::AtomFeedBurner, Feedzirra::Parser::Atom, Feedzirra::Parser::ITunesRSS, Feedzirra::Parser::RSS]
48
48
  end
49
49
 
50
50
  # Makes all registered feeds types look for the passed in element to parse.
@@ -247,7 +247,7 @@ module Feedzirra
247
247
  end
248
248
 
249
249
  multi.perform
250
- responses.is_a?(Array)? responses.values : responses.values.first
250
+ feeds.is_a?(Array) ? responses : responses.values.first
251
251
  end
252
252
 
253
253
  # An abstraction for adding a feed by URL to the passed Curb::multi stack.
@@ -292,7 +292,19 @@ module Feedzirra
292
292
  options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
293
293
  end
294
294
  end
295
-
295
+
296
+ #
297
+ # trigger on_failure for 404s
298
+ #
299
+ curl.on_complete do |c|
300
+ add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
301
+ responses[url] = c.response_code
302
+
303
+ if c.response_code == 404 && options.has_key?(:on_failure)
304
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str)
305
+ end
306
+ end
307
+
296
308
  curl.on_failure do |c, err|
297
309
  add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
298
310
  responses[url] = c.response_code
@@ -376,7 +388,7 @@ module Feedzirra
376
388
  # A Time object of the last modified date or nil if it cannot be found in the headers.
377
389
  def self.last_modified_from_header(header)
378
390
  header =~ /.*Last-Modified:\s(.*)\r/
379
- Time.parse($1) if $1
391
+ Time.parse_safely($1) if $1
380
392
  end
381
393
  end
382
394
  end
@@ -41,6 +41,7 @@ module Feedzirra
41
41
  self.author.sanitize! if self.author
42
42
  self.summary.sanitize! if self.summary
43
43
  self.content.sanitize! if self.content
44
+ self.image.sanitize! if self.image
44
45
  end
45
46
 
46
47
  alias_method :last_modified, :published
@@ -29,9 +29,9 @@ module Feedzirra
29
29
  self.entries.unshift(*self.new_entries)
30
30
 
31
31
  @updated = false
32
+
32
33
  UPDATABLE_ATTRIBUTES.each do |name|
33
- updated = update_attribute(feed, name)
34
- @updated ||= updated
34
+ @updated ||= update_attribute(feed, name)
35
35
  end
36
36
  end
37
37
 
@@ -40,6 +40,9 @@ module Feedzirra
40
40
 
41
41
  if old_value != new_value
42
42
  send("#{name}=", new_value)
43
+ true
44
+ else
45
+ false
43
46
  end
44
47
  end
45
48
 
@@ -54,19 +57,19 @@ module Feedzirra
54
57
  # it's to get around the fact that not all feeds have a published date.
55
58
  # however, they're always ordered with the newest one first.
56
59
  # So we go through the entries just parsed and insert each one as a new entry
57
- # until we get to one that has the same url as the the newest for the feed
60
+ # until we get to one that has the same id as the the newest for the feed
58
61
  return feed.entries if self.entries.length == 0
59
62
  latest_entry = self.entries.first
60
63
  found_new_entries = []
61
64
  feed.entries.each do |entry|
62
- break if entry.url == latest_entry.url
65
+ break if entry.id == latest_entry.id
63
66
  found_new_entries << entry
64
67
  end
65
68
  found_new_entries
66
69
  end
67
70
 
68
71
  def existing_entry?(test_entry)
69
- entries.any? { |entry| entry.url == test_entry.url }
72
+ entries.any? { |entry| entry.id == test_entry.id }
70
73
  end
71
74
  end
72
- end
75
+ end
@@ -11,6 +11,10 @@ module Feedzirra
11
11
  element :name, :as => :author
12
12
  element :content
13
13
  element :summary
14
+
15
+ element :"media:content", :as => :image, :value => :url
16
+ element :enclosure, :as => :image, :value => :href
17
+
14
18
  element :published
15
19
  element :id, :as => :entry_id
16
20
  element :created, :as => :published
@@ -12,6 +12,10 @@ module Feedzirra
12
12
  element :"feedburner:origLink", :as => :url
13
13
  element :summary
14
14
  element :content
15
+
16
+ element :"media:content", :as => :image, :value => :url
17
+ element :enclosure, :as => :image, :value => :href
18
+
15
19
  element :published
16
20
  element :id, :as => :entry_id
17
21
  element :issued, :as => :published
@@ -13,6 +13,9 @@ module Feedzirra
13
13
  element :author, :as => :author
14
14
  element :"content:encoded", :as => :content
15
15
  element :description, :as => :summary
16
+
17
+ element :"media:content", :as => :image, :value => :url
18
+ element :enclosure, :as => :image, :value => :url
16
19
 
17
20
  element :pubDate, :as => :published
18
21
  element :pubdate, :as => :published
@@ -25,8 +28,7 @@ module Feedzirra
25
28
  element :issued, :as => :published
26
29
  elements :category, :as => :categories
27
30
 
28
- element :guid, :as => :entry_id
29
-
31
+ element :guid, :as => :entry_id
30
32
  end
31
33
 
32
34
  end
@@ -16,6 +16,9 @@ module Feedzirra
16
16
  element :"content:encoded", :as => :content
17
17
  element :description, :as => :summary
18
18
 
19
+ element :"media:content", :as => :image, :value => :url
20
+ element :enclosure, :as => :image, :value => :url
21
+
19
22
  element :pubDate, :as => :published
20
23
  element :pubdate, :as => :published
21
24
  element :"dc:date", :as => :published
@@ -1,3 +1,3 @@
1
1
  module Feedzirra
2
- VERSION = '0.2.0.rc2'
2
+ VERSION = '0.2.1'
3
3
  end
@@ -68,14 +68,10 @@ describe Feedzirra::Feed do
68
68
  feed.entries.size.should == 5
69
69
  end
70
70
 
71
- it "should parse an itunes feed as a standard RSS feed" do
71
+ it "should parse an itunes feed" do
72
72
  feed = Feedzirra::Feed.parse(sample_itunes_feed)
73
73
  feed.title.should == "All About Everything"
74
74
  feed.entries.first.published.should == Time.parse_safely("Wed, 15 Jun 2005 19:00:00 GMT")
75
-
76
- # Since the commit 621957879, iTunes feeds will be parsed as standard RSS, so this
77
- # entry should now not have a method for itunes_author.
78
- feed.entries.first.should_not respond_to(:itunes_author)
79
75
  feed.entries.size.should == 3
80
76
  end
81
77
  end
@@ -122,7 +118,7 @@ describe Feedzirra::Feed do
122
118
  end
123
119
 
124
120
  it "should return a Feedzirra::Parser::RSS object for an itunes feed" do
125
- Feedzirra::Feed.determine_feed_parser_for_xml(sample_itunes_feed).should == Feedzirra::Parser::RSS
121
+ Feedzirra::Feed.determine_feed_parser_for_xml(sample_itunes_feed).should == Feedzirra::Parser::ITunesRSS
126
122
  end
127
123
 
128
124
  end
@@ -373,17 +369,17 @@ describe Feedzirra::Feed do
373
369
 
374
370
  describe 'on failure' do
375
371
  before(:each) do
376
- @headers = "HTTP/1.0 404 Not Found\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\n"
377
- @body = 'Page could not be found.'
372
+ @headers = "HTTP/1.0 500 Something Bad\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\n"
373
+ @body = 'Sorry, something broke'
378
374
 
379
- @easy_curl.stub!(:response_code).and_return(404)
375
+ @easy_curl.stub!(:response_code).and_return(500)
380
376
  @easy_curl.stub!(:header_str).and_return(@headers)
381
377
  @easy_curl.stub!(:body_str).and_return(@body)
382
378
  end
383
379
 
384
380
  it 'should call proc if :on_failure option is passed' do
385
381
  failure = lambda { |url, feed| }
386
- failure.should_receive(:call).with(@paul_feed[:url], 404, @headers, @body)
382
+ failure.should_receive(:call).with(@paul_feed[:url], 500, @headers, @body)
387
383
  Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { :on_failure => failure })
388
384
  @easy_curl.on_failure.call(@easy_curl)
389
385
  end
@@ -393,6 +389,33 @@ describe Feedzirra::Feed do
393
389
  Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], responses, {})
394
390
  @easy_curl.on_failure.call(@easy_curl)
395
391
 
392
+ responses.length.should == 1
393
+ responses[@paul_feed[:url]].should == 500
394
+ end
395
+ end
396
+
397
+ describe 'on complete for 404s' do
398
+ before(:each) do
399
+ @headers = "HTTP/1.0 404 Not Found\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\n"
400
+ @body = 'Page could not be found.'
401
+
402
+ @easy_curl.stub!(:response_code).and_return(404)
403
+ @easy_curl.stub!(:header_str).and_return(@headers)
404
+ @easy_curl.stub!(:body_str).and_return(@body)
405
+ end
406
+
407
+ it 'should call proc if :on_failure option is passed' do
408
+ complete = lambda { |url| }
409
+ complete.should_receive(:call).with(@paul_feed[:url], 404, @headers, @body)
410
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { :on_failure => complete })
411
+ @easy_curl.on_complete.call(@easy_curl)
412
+ end
413
+
414
+ it 'should return the http code in the responses' do
415
+ responses = {}
416
+ Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], responses, {})
417
+ @easy_curl.on_complete.call(@easy_curl)
418
+
396
419
  responses.length.should == 1
397
420
  responses[@paul_feed[:url]].should == 404
398
421
  end
@@ -146,5 +146,48 @@ describe Feedzirra::FeedUtilities do
146
146
  @feed.entries.should include(@old_entry)
147
147
  end
148
148
  end
149
+
150
+ describe "changing the url of an existing entry" do
151
+ before(:each) do
152
+ # I'm using the Atom class when I know I should be using a different one. However, this update_from_feed
153
+ # method would only be called against a feed item.
154
+ @feed = Feedzirra::Parser::Atom.new
155
+ @feed.title = "A title"
156
+ @feed.url = "http://pauldix.net"
157
+ @feed.feed_url = "http://feeds.feedburner.com/PaulDixExplainsNothing"
158
+ @feed.updated = false
159
+ @updated_feed = @feed.dup
160
+
161
+ @old_entry = Feedzirra::Parser::AtomEntry.new
162
+ @old_entry.url = "http://pauldix.net/old.html"
163
+ @old_entry.published = (Time.now - 10).to_s
164
+
165
+ @entry = Feedzirra::Parser::AtomEntry.new
166
+ @entry.published = (Time.now + 10).to_s
167
+ @entry.entry_id = "entry_id"
168
+ @entry.url = "http://pauldix.net/entry.html"
169
+
170
+ # only difference is a changed url
171
+ @entry_changed_url = @entry.dup
172
+ @entry_changed_url.url = "http://pauldix.net/updated.html"
173
+
174
+ # entry with changed url must be first
175
+ @feed.entries << @entry
176
+ @feed.entries << @old_entry
177
+ @updated_feed.entries << @entry_changed_url
178
+ @updated_feed.entries << @old_entry
179
+ end
180
+
181
+ it "should not put the complete feed into new_entries" do
182
+ @feed.update_from_feed(@updated_feed)
183
+
184
+ @feed.new_entries.should_not include(@entry_changed_url)
185
+ @feed.new_entries.should_not include(@old_entry)
186
+
187
+ # don't return complete feed
188
+ @feed.new_entries.size.should == 0
189
+ @feed.new_entries.size.should_not == 2
190
+ end
191
+ end
149
192
  end
150
- end
193
+ end
metadata CHANGED
@@ -1,8 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedzirra
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0.rc2
5
- prerelease: 6
4
+ version: 0.2.1
5
+ prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Paul Dix
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-06-04 00:00:00.000000000 Z
14
+ date: 2013-10-15 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: nokogiri
@@ -20,7 +20,7 @@ dependencies:
20
20
  requirements:
21
21
  - - ~>
22
22
  - !ruby/object:Gem::Version
23
- version: 1.5.3
23
+ version: 1.6.0
24
24
  type: :runtime
25
25
  prerelease: false
26
26
  version_requirements: !ruby/object:Gem::Requirement
@@ -28,7 +28,7 @@ dependencies:
28
28
  requirements:
29
29
  - - ~>
30
30
  - !ruby/object:Gem::Version
31
- version: 1.5.3
31
+ version: 1.6.0
32
32
  - !ruby/object:Gem::Dependency
33
33
  name: sax-machine
34
34
  requirement: !ruby/object:Gem::Requirement
@@ -36,7 +36,7 @@ dependencies:
36
36
  requirements:
37
37
  - - ~>
38
38
  - !ruby/object:Gem::Version
39
- version: 0.2.0.rc1
39
+ version: 0.2.1
40
40
  type: :runtime
41
41
  prerelease: false
42
42
  version_requirements: !ruby/object:Gem::Requirement
@@ -44,7 +44,7 @@ dependencies:
44
44
  requirements:
45
45
  - - ~>
46
46
  - !ruby/object:Gem::Version
47
- version: 0.2.0.rc1
47
+ version: 0.2.1
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: curb
50
50
  requirement: !ruby/object:Gem::Requirement
@@ -52,7 +52,7 @@ dependencies:
52
52
  requirements:
53
53
  - - ~>
54
54
  - !ruby/object:Gem::Version
55
- version: 0.8.0
55
+ version: 0.8.1
56
56
  type: :runtime
57
57
  prerelease: false
58
58
  version_requirements: !ruby/object:Gem::Requirement
@@ -60,7 +60,7 @@ dependencies:
60
60
  requirements:
61
61
  - - ~>
62
62
  - !ruby/object:Gem::Version
63
- version: 0.8.0
63
+ version: 0.8.1
64
64
  - !ruby/object:Gem::Dependency
65
65
  name: loofah
66
66
  requirement: !ruby/object:Gem::Requirement
@@ -77,22 +77,6 @@ dependencies:
77
77
  - - ~>
78
78
  - !ruby/object:Gem::Version
79
79
  version: 1.2.1
80
- - !ruby/object:Gem::Dependency
81
- name: gorillib
82
- requirement: !ruby/object:Gem::Requirement
83
- none: false
84
- requirements:
85
- - - ~>
86
- - !ruby/object:Gem::Version
87
- version: 0.1.9
88
- type: :runtime
89
- prerelease: false
90
- version_requirements: !ruby/object:Gem::Requirement
91
- none: false
92
- requirements:
93
- - - ~>
94
- - !ruby/object:Gem::Version
95
- version: 0.1.9
96
80
  - !ruby/object:Gem::Dependency
97
81
  name: rspec
98
82
  requirement: !ruby/object:Gem::Requirement
@@ -100,7 +84,7 @@ dependencies:
100
84
  requirements:
101
85
  - - ~>
102
86
  - !ruby/object:Gem::Version
103
- version: 2.10.0
87
+ version: 2.13.0
104
88
  type: :development
105
89
  prerelease: false
106
90
  version_requirements: !ruby/object:Gem::Requirement
@@ -108,9 +92,8 @@ dependencies:
108
92
  requirements:
109
93
  - - ~>
110
94
  - !ruby/object:Gem::Version
111
- version: 2.10.0
112
- description: ! 'A feed fetching and parsing library that treats the internet like
113
- Godzilla treats Japan: it dominates and eats all.'
95
+ version: 2.13.0
96
+ description: A library designed to retrieve and parse feeds as quickly as possible
114
97
  email: feedzirra@googlegroups.com
115
98
  executables: []
116
99
  extensions: []
@@ -129,6 +112,7 @@ files:
129
112
  - lib/feedzirra/core_ext.rb
130
113
  - lib/feedzirra/core_ext/date.rb
131
114
  - lib/feedzirra/core_ext/string.rb
115
+ - lib/feedzirra/core_ext/time.rb
132
116
  - lib/feedzirra/feed.rb
133
117
  - lib/feedzirra/feed_entry_utilities.rb
134
118
  - lib/feedzirra/feed_utilities.rb
@@ -202,18 +186,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
202
186
  - - ! '>='
203
187
  - !ruby/object:Gem::Version
204
188
  version: '0'
205
- segments:
206
- - 0
207
- hash: -1608049973569295975
208
189
  required_rubygems_version: !ruby/object:Gem::Requirement
209
190
  none: false
210
191
  requirements:
211
- - - ! '>'
192
+ - - ! '>='
212
193
  - !ruby/object:Gem::Version
213
- version: 1.3.1
194
+ version: '0'
214
195
  requirements: []
215
196
  rubyforge_project:
216
- rubygems_version: 1.8.24
197
+ rubygems_version: 1.8.23
217
198
  signing_key:
218
199
  specification_version: 3
219
200
  summary: A feed fetching and parsing library