feedzirra 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,7 +46,7 @@ module Feedzirra
46
46
  # === Returns
47
47
  # A array of class names.
48
48
  def self.feed_classes
49
- @feed_classes ||= [Feedzirra::Parser::RSSFeedBurner, Feedzirra::Parser::RSS, Feedzirra::Parser::AtomFeedBurner, Feedzirra::Parser::Atom]
49
+ @feed_classes ||= [Feedzirra::Parser::RSSFeedBurner, Feedzirra::Parser::RSS, Feedzirra::Parser::GoogleDocsAtom, Feedzirra::Parser::AtomFeedBurner, Feedzirra::Parser::Atom, Feedzirra::Parser::ITunesRSS]
50
50
  end
51
51
 
52
52
  # Makes all registered feeds types look for the passed in element to parse.
@@ -279,7 +279,7 @@ module Feedzirra
279
279
 
280
280
  if klass
281
281
  begin
282
- feed = klass.parse(xml, Proc.new{|message| puts "Error while parsing [#{url}] #{message}" })
282
+ feed = klass.parse(xml, Proc.new{|message| warn "Error while parsing [#{url}] #{message}" })
283
283
  feed.feed_url = c.last_effective_url
284
284
  feed.etag = etag_from_header(c.header_str)
285
285
  feed.last_modified = last_modified_from_header(c.header_str)
@@ -332,7 +332,7 @@ module Feedzirra
332
332
  curl.on_success do |c|
333
333
  begin
334
334
  add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
335
- updated_feed = Feed.parse(c.body_str){ |message| puts "Error while parsing [#{feed.feed_url}] #{message}" }
335
+ updated_feed = Feed.parse(c.body_str){ |message| warn "Error while parsing [#{feed.feed_url}] #{message}" }
336
336
  updated_feed.feed_url = c.last_effective_url
337
337
  updated_feed.etag = etag_from_header(c.header_str)
338
338
  updated_feed.last_modified = last_modified_from_header(c.header_str)
@@ -381,4 +381,4 @@ module Feedzirra
381
381
  Time.parse($1) if $1
382
382
  end
383
383
  end
384
- end
384
+ end
@@ -11,7 +11,7 @@ module Feedzirra
11
11
  begin
12
12
  DateTime.parse(string).feed_utils_to_gm_time
13
13
  rescue
14
- puts "DATE CAN'T BE PARSED: [#{string}]"
14
+ warn "Failed to parse date #{string.inspect}"
15
15
  nil
16
16
  end
17
17
  end
@@ -50,23 +50,12 @@ module Feedzirra
50
50
  private
51
51
 
52
52
  def find_new_entries_for(feed)
53
- # this implementation is a hack, which is why it's so ugly.
54
- # it's to get around the fact that not all feeds have a published date.
55
- # however, they're always ordered with the newest one first.
56
- # So we go through the entries just parsed and insert each one as a new entry
57
- # until we get to one that has the same url as the the newest for the feed
58
- return feed.entries if self.entries.length == 0
59
- latest_entry = self.entries.first
60
- found_new_entries = []
61
- feed.entries.each do |entry|
62
- break if entry.url == latest_entry.url
63
- found_new_entries << entry
64
- end
65
- found_new_entries
53
+ # this algorithm does not optimize based on publication date, but always finds new entries
54
+ feed.entries.reject {|entry| self.entries.any? {|e| e.url == entry.url} }
66
55
  end
67
56
 
68
57
  def existing_entry?(test_entry)
69
58
  entries.any? { |entry| entry.url == test_entry.url }
70
59
  end
71
60
  end
72
- end
61
+ end
@@ -26,4 +26,4 @@ module Feedzirra
26
26
  end
27
27
  end
28
28
 
29
- end
29
+ end
@@ -0,0 +1,28 @@
1
+ require File.expand_path('./atom', File.dirname(__FILE__))
2
+
3
+ module Feedzirra
4
+ module Parser
5
+ class GoogleDocsAtom
6
+ include SAXMachine
7
+ include FeedUtilities
8
+ element :title
9
+ element :subtitle, :as => :description
10
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
11
+ element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
12
+ elements :link, :as => :links, :value => :href
13
+ elements :entry, :as => :entries, :class => GoogleDocsAtomEntry
14
+
15
+ def url
16
+ @url ||= links.first
17
+ end
18
+
19
+ def self.able_to_parse?(xml) #:nodoc:
20
+ %r{<id>https?://docs.google.com/.*\</id\>} =~ xml
21
+ end
22
+
23
+ def feed_url
24
+ @feed_url ||= links.first
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,29 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class GoogleDocsAtomEntry
4
+ include SAXMachine
5
+ include FeedEntryUtilities
6
+
7
+ element :title
8
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
9
+ element :name, :as => :author
10
+ element :content
11
+ element :summary
12
+ element :published
13
+ element :id, :as => :entry_id
14
+ element :created, :as => :published
15
+ element :issued, :as => :published
16
+ element :updated
17
+ element :modified, :as => :updated
18
+ elements :category, :as => :categories, :value => :term
19
+ elements :link, :as => :links, :value => :href
20
+ element :"docs:md5Checksum", :as => :checksum
21
+ element :"docs:filename", :as => :original_filename
22
+ element :"docs:suggestedFilename", :as => :suggested_filename
23
+
24
+ def url
25
+ @url ||= links.first
26
+ end
27
+ end
28
+ end
29
+ end
@@ -9,6 +9,9 @@ module Feedzirra
9
9
  autoload :ITunesRSSItem, 'feedzirra/parser/itunes_rss_item'
10
10
  autoload :ITunesRSSOwner, 'feedzirra/parser/itunes_rss_owner'
11
11
 
12
+ autoload :GoogleDocsAtom, 'feedzirra/parser/google_docs_atom'
13
+ autoload :GoogleDocsAtomEntry, 'feedzirra/parser/google_docs_atom_entry'
14
+
12
15
  autoload :Atom, 'feedzirra/parser/atom'
13
16
  autoload :AtomEntry, 'feedzirra/parser/atom_entry'
14
17
  autoload :AtomFeedBurner, 'feedzirra/parser/atom_feed_burner'
@@ -1,3 +1,3 @@
1
1
  module Feedzirra
2
- VERSION = '0.1.2'
2
+ VERSION = '0.1.3'
3
3
  end
@@ -97,6 +97,10 @@ describe Feedzirra::Feed do
97
97
  end
98
98
 
99
99
  describe "#determine_feed_parser_for_xml" do
100
+ it 'should return the Feedzirra::Parser::GoogleDocsAtom calss for a Google Docs atom feed' do
101
+ Feedzirra::Feed.determine_feed_parser_for_xml(sample_google_docs_list_feed).should == Feedzirra::Parser::GoogleDocsAtom
102
+ end
103
+
100
104
  it "should return the Feedzirra::Parser::Atom class for an atom feed" do
101
105
  Feedzirra::Feed.determine_feed_parser_for_xml(sample_atom_feed).should == Feedzirra::Parser::Atom
102
106
  end
@@ -590,4 +594,4 @@ describe Feedzirra::Feed do
590
594
  it "should return an return an array of feed objects if multiple feeds are passed in"
591
595
  end
592
596
  end
593
- end
597
+ end
@@ -0,0 +1,22 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::GoogleDocsAtomEntry do
4
+ describe 'parsing' do
5
+ before do
6
+ @feed = Feedzirra::Parser::GoogleDocsAtom.parse(sample_google_docs_list_feed)
7
+ @entry = @feed.entries.first
8
+ end
9
+
10
+ it 'should have the custom checksum element' do
11
+ @entry.checksum.should eql '2b01142f7481c7b056c4b410d28f33cf'
12
+ end
13
+
14
+ it 'should have the custom filename element' do
15
+ @entry.original_filename.should eql "MyFile.pdf"
16
+ end
17
+
18
+ it 'should have the custom suggested filename element' do
19
+ @entry.suggested_filename.should eql "TaxDocument.pdf"
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,31 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::GoogleDocsAtom do
4
+ describe '.able_to_parser?' do
5
+ it 'should return true for Google Docs feed' do
6
+ Feedzirra::Parser::GoogleDocsAtom.should be_able_to_parse(sample_google_docs_list_feed)
7
+ end
8
+
9
+ it 'should not be able to parse another Atom feed' do
10
+ Feedzirra::Parser::GoogleDocsAtom.should_not be_able_to_parse(sample_atom_feed)
11
+ end
12
+ end
13
+
14
+ describe 'parsing' do
15
+ before do
16
+ @feed = Feedzirra::Parser::GoogleDocsAtom.parse(sample_google_docs_list_feed)
17
+ end
18
+
19
+ it 'should return a bunch of objects' do
20
+ @feed.entries.should_not be_empty
21
+ end
22
+
23
+ it 'should populate a title, interhited from the Atom entry' do
24
+ @feed.title.should_not be_nil
25
+ end
26
+
27
+ it 'should return a bunch of entries of type GoogleDocsAtomEntry' do
28
+ @feed.entries.first.should be_a Feedzirra::Parser::GoogleDocsAtomEntry
29
+ end
30
+ end
31
+ end
data/spec/spec_helper.rb CHANGED
@@ -64,6 +64,10 @@ end
64
64
  def sample_wfw_feed
65
65
  load_sample("PaulDixExplainsNothingWFW.xml")
66
66
  end
67
+
68
+ def sample_google_docs_list_feed
69
+ load_sample("GoogleDocsList.xml")
70
+ end
67
71
 
68
72
  # http://eigenclass.org/hiki/Changes+in+Ruby+1.9#l156
69
73
  # Default Time.to_s changed in 1.9, monkeypatching it back
@@ -71,4 +75,4 @@ class Time
71
75
  def to_s
72
76
  strftime("%a %b %d %H:%M:%S %Z %Y")
73
77
  end
74
- end
78
+ end
metadata CHANGED
@@ -1,181 +1,134 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: feedzirra
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 1
8
- - 2
9
- version: 0.1.2
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.3
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Paul Dix
13
9
  - Julien Kirch
14
10
  autorequire:
15
11
  bindir: bin
16
12
  cert_chain: []
17
-
18
- date: 2011-09-30 00:00:00 -04:00
19
- default_executable:
20
- dependencies:
21
- - !ruby/object:Gem::Dependency
13
+ date: 2011-09-30 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
22
16
  name: nokogiri
23
- prerelease: false
24
- requirement: &id001 !ruby/object:Gem::Requirement
17
+ requirement: &70245154994900 !ruby/object:Gem::Requirement
25
18
  none: false
26
- requirements:
27
- - - ">="
28
- - !ruby/object:Gem::Version
29
- segments:
30
- - 1
31
- - 4
32
- - 4
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
33
22
  version: 1.4.4
34
23
  type: :runtime
35
- version_requirements: *id001
36
- - !ruby/object:Gem::Dependency
37
- name: sax-machine
38
24
  prerelease: false
39
- requirement: &id002 !ruby/object:Gem::Requirement
25
+ version_requirements: *70245154994900
26
+ - !ruby/object:Gem::Dependency
27
+ name: sax-machine
28
+ requirement: &70245154994260 !ruby/object:Gem::Requirement
40
29
  none: false
41
- requirements:
30
+ requirements:
42
31
  - - ~>
43
- - !ruby/object:Gem::Version
44
- segments:
45
- - 0
46
- - 1
47
- - 0
32
+ - !ruby/object:Gem::Version
48
33
  version: 0.1.0
49
34
  type: :runtime
50
- version_requirements: *id002
51
- - !ruby/object:Gem::Dependency
52
- name: curb
53
35
  prerelease: false
54
- requirement: &id003 !ruby/object:Gem::Requirement
36
+ version_requirements: *70245154994260
37
+ - !ruby/object:Gem::Dependency
38
+ name: curb
39
+ requirement: &70245154993700 !ruby/object:Gem::Requirement
55
40
  none: false
56
- requirements:
41
+ requirements:
57
42
  - - ~>
58
- - !ruby/object:Gem::Version
59
- segments:
60
- - 0
61
- - 7
62
- - 15
43
+ - !ruby/object:Gem::Version
63
44
  version: 0.7.15
64
45
  type: :runtime
65
- version_requirements: *id003
66
- - !ruby/object:Gem::Dependency
67
- name: builder
68
46
  prerelease: false
69
- requirement: &id004 !ruby/object:Gem::Requirement
47
+ version_requirements: *70245154993700
48
+ - !ruby/object:Gem::Dependency
49
+ name: builder
50
+ requirement: &70245154993160 !ruby/object:Gem::Requirement
70
51
  none: false
71
- requirements:
72
- - - ">="
73
- - !ruby/object:Gem::Version
74
- segments:
75
- - 2
76
- - 1
77
- - 2
52
+ requirements:
53
+ - - ! '>='
54
+ - !ruby/object:Gem::Version
78
55
  version: 2.1.2
79
56
  type: :runtime
80
- version_requirements: *id004
81
- - !ruby/object:Gem::Dependency
82
- name: activesupport
83
57
  prerelease: false
84
- requirement: &id005 !ruby/object:Gem::Requirement
58
+ version_requirements: *70245154993160
59
+ - !ruby/object:Gem::Dependency
60
+ name: activesupport
61
+ requirement: &70245154845640 !ruby/object:Gem::Requirement
85
62
  none: false
86
- requirements:
63
+ requirements:
87
64
  - - ~>
88
- - !ruby/object:Gem::Version
89
- segments:
90
- - 3
91
- - 0
92
- - 8
93
- version: 3.0.8
65
+ - !ruby/object:Gem::Version
66
+ version: 3.1.1
94
67
  type: :runtime
95
- version_requirements: *id005
96
- - !ruby/object:Gem::Dependency
97
- name: loofah
98
68
  prerelease: false
99
- requirement: &id006 !ruby/object:Gem::Requirement
69
+ version_requirements: *70245154845640
70
+ - !ruby/object:Gem::Dependency
71
+ name: loofah
72
+ requirement: &70245154844980 !ruby/object:Gem::Requirement
100
73
  none: false
101
- requirements:
74
+ requirements:
102
75
  - - ~>
103
- - !ruby/object:Gem::Version
104
- segments:
105
- - 1
106
- - 2
107
- - 0
76
+ - !ruby/object:Gem::Version
108
77
  version: 1.2.0
109
78
  type: :runtime
110
- version_requirements: *id006
111
- - !ruby/object:Gem::Dependency
112
- name: rdoc
113
79
  prerelease: false
114
- requirement: &id007 !ruby/object:Gem::Requirement
80
+ version_requirements: *70245154844980
81
+ - !ruby/object:Gem::Dependency
82
+ name: rdoc
83
+ requirement: &70245154844420 !ruby/object:Gem::Requirement
115
84
  none: false
116
- requirements:
85
+ requirements:
117
86
  - - ~>
118
- - !ruby/object:Gem::Version
119
- segments:
120
- - 3
121
- - 8
122
- version: "3.8"
87
+ - !ruby/object:Gem::Version
88
+ version: '3.8'
123
89
  type: :runtime
124
- version_requirements: *id007
125
- - !ruby/object:Gem::Dependency
126
- name: rake
127
90
  prerelease: false
128
- requirement: &id008 !ruby/object:Gem::Requirement
91
+ version_requirements: *70245154844420
92
+ - !ruby/object:Gem::Dependency
93
+ name: rake
94
+ requirement: &70245154843940 !ruby/object:Gem::Requirement
129
95
  none: false
130
- requirements:
131
- - - ">="
132
- - !ruby/object:Gem::Version
133
- segments:
134
- - 0
135
- - 8
136
- - 7
96
+ requirements:
97
+ - - ! '>='
98
+ - !ruby/object:Gem::Version
137
99
  version: 0.8.7
138
100
  type: :runtime
139
- version_requirements: *id008
140
- - !ruby/object:Gem::Dependency
141
- name: i18n
142
101
  prerelease: false
143
- requirement: &id009 !ruby/object:Gem::Requirement
102
+ version_requirements: *70245154843940
103
+ - !ruby/object:Gem::Dependency
104
+ name: i18n
105
+ requirement: &70245154843440 !ruby/object:Gem::Requirement
144
106
  none: false
145
- requirements:
146
- - - ">="
147
- - !ruby/object:Gem::Version
148
- segments:
149
- - 0
150
- - 5
151
- - 0
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
152
110
  version: 0.5.0
153
111
  type: :runtime
154
- version_requirements: *id009
155
- - !ruby/object:Gem::Dependency
156
- name: rspec
157
112
  prerelease: false
158
- requirement: &id010 !ruby/object:Gem::Requirement
113
+ version_requirements: *70245154843440
114
+ - !ruby/object:Gem::Dependency
115
+ name: rspec
116
+ requirement: &70245154842920 !ruby/object:Gem::Requirement
159
117
  none: false
160
- requirements:
118
+ requirements:
161
119
  - - ~>
162
- - !ruby/object:Gem::Version
163
- segments:
164
- - 2
165
- - 6
166
- - 0
120
+ - !ruby/object:Gem::Version
167
121
  version: 2.6.0
168
122
  type: :development
169
- version_requirements: *id010
170
- description: "A feed fetching and parsing library that treats the internet like Godzilla treats Japan: it dominates and eats all."
123
+ prerelease: false
124
+ version_requirements: *70245154842920
125
+ description: ! 'A feed fetching and parsing library that treats the internet like
126
+ Godzilla treats Japan: it dominates and eats all.'
171
127
  email: feedzirra@googlegroups.com
172
128
  executables: []
173
-
174
129
  extensions: []
175
-
176
130
  extra_rdoc_files: []
177
-
178
- files:
131
+ files:
179
132
  - lib/feedzirra/core_ext/date.rb
180
133
  - lib/feedzirra/core_ext/string.rb
181
134
  - lib/feedzirra/core_ext.rb
@@ -186,6 +139,8 @@ files:
186
139
  - lib/feedzirra/parser/atom_entry.rb
187
140
  - lib/feedzirra/parser/atom_feed_burner.rb
188
141
  - lib/feedzirra/parser/atom_feed_burner_entry.rb
142
+ - lib/feedzirra/parser/google_docs_atom.rb
143
+ - lib/feedzirra/parser/google_docs_atom_entry.rb
189
144
  - lib/feedzirra/parser/itunes_rss.rb
190
145
  - lib/feedzirra/parser/itunes_rss_item.rb
191
146
  - lib/feedzirra/parser/itunes_rss_owner.rb
@@ -208,6 +163,8 @@ files:
208
163
  - spec/feedzirra/parser/atom_feed_burner_entry_spec.rb
209
164
  - spec/feedzirra/parser/atom_feed_burner_spec.rb
210
165
  - spec/feedzirra/parser/atom_spec.rb
166
+ - spec/feedzirra/parser/google_docs_atom_entry_spec.rb
167
+ - spec/feedzirra/parser/google_docs_atom_spec.rb
211
168
  - spec/feedzirra/parser/itunes_rss_item_spec.rb
212
169
  - spec/feedzirra/parser/itunes_rss_owner_spec.rb
213
170
  - spec/feedzirra/parser/itunes_rss_spec.rb
@@ -220,39 +177,31 @@ files:
220
177
  - README.rdoc
221
178
  - Rakefile
222
179
  - .rspec
223
- has_rdoc: true
224
180
  homepage: http://github.com/pauldix/feedzirra
225
181
  licenses: []
226
-
227
182
  post_install_message:
228
183
  rdoc_options: []
229
-
230
- require_paths:
184
+ require_paths:
231
185
  - lib
232
- required_ruby_version: !ruby/object:Gem::Requirement
186
+ required_ruby_version: !ruby/object:Gem::Requirement
233
187
  none: false
234
- requirements:
235
- - - ">="
236
- - !ruby/object:Gem::Version
237
- segments:
238
- - 0
239
- version: "0"
240
- required_rubygems_version: !ruby/object:Gem::Requirement
188
+ requirements:
189
+ - - ! '>='
190
+ - !ruby/object:Gem::Version
191
+ version: '0'
192
+ required_rubygems_version: !ruby/object:Gem::Requirement
241
193
  none: false
242
- requirements:
243
- - - ">="
244
- - !ruby/object:Gem::Version
245
- segments:
246
- - 0
247
- version: "0"
194
+ requirements:
195
+ - - ! '>='
196
+ - !ruby/object:Gem::Version
197
+ version: '0'
248
198
  requirements: []
249
-
250
199
  rubyforge_project:
251
- rubygems_version: 1.3.7
200
+ rubygems_version: 1.8.17
252
201
  signing_key:
253
202
  specification_version: 3
254
203
  summary: A feed fetching and parsing library
255
- test_files:
204
+ test_files:
256
205
  - spec/benchmarks/feed_benchmarks.rb
257
206
  - spec/benchmarks/feedzirra_benchmarks.rb
258
207
  - spec/benchmarks/fetching_benchmarks.rb
@@ -265,6 +214,8 @@ test_files:
265
214
  - spec/feedzirra/parser/atom_feed_burner_entry_spec.rb
266
215
  - spec/feedzirra/parser/atom_feed_burner_spec.rb
267
216
  - spec/feedzirra/parser/atom_spec.rb
217
+ - spec/feedzirra/parser/google_docs_atom_entry_spec.rb
218
+ - spec/feedzirra/parser/google_docs_atom_spec.rb
268
219
  - spec/feedzirra/parser/itunes_rss_item_spec.rb
269
220
  - spec/feedzirra/parser/itunes_rss_owner_spec.rb
270
221
  - spec/feedzirra/parser/itunes_rss_spec.rb