feedzirra 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -46,7 +46,7 @@ module Feedzirra
46
46
  # === Returns
47
47
  # A array of class names.
48
48
  def self.feed_classes
49
- @feed_classes ||= [Feedzirra::Parser::RSSFeedBurner, Feedzirra::Parser::RSS, Feedzirra::Parser::AtomFeedBurner, Feedzirra::Parser::Atom]
49
+ @feed_classes ||= [Feedzirra::Parser::RSSFeedBurner, Feedzirra::Parser::RSS, Feedzirra::Parser::GoogleDocsAtom, Feedzirra::Parser::AtomFeedBurner, Feedzirra::Parser::Atom, Feedzirra::Parser::ITunesRSS]
50
50
  end
51
51
 
52
52
  # Makes all registered feeds types look for the passed in element to parse.
@@ -279,7 +279,7 @@ module Feedzirra
279
279
 
280
280
  if klass
281
281
  begin
282
- feed = klass.parse(xml, Proc.new{|message| puts "Error while parsing [#{url}] #{message}" })
282
+ feed = klass.parse(xml, Proc.new{|message| warn "Error while parsing [#{url}] #{message}" })
283
283
  feed.feed_url = c.last_effective_url
284
284
  feed.etag = etag_from_header(c.header_str)
285
285
  feed.last_modified = last_modified_from_header(c.header_str)
@@ -332,7 +332,7 @@ module Feedzirra
332
332
  curl.on_success do |c|
333
333
  begin
334
334
  add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
335
- updated_feed = Feed.parse(c.body_str){ |message| puts "Error while parsing [#{feed.feed_url}] #{message}" }
335
+ updated_feed = Feed.parse(c.body_str){ |message| warn "Error while parsing [#{feed.feed_url}] #{message}" }
336
336
  updated_feed.feed_url = c.last_effective_url
337
337
  updated_feed.etag = etag_from_header(c.header_str)
338
338
  updated_feed.last_modified = last_modified_from_header(c.header_str)
@@ -381,4 +381,4 @@ module Feedzirra
381
381
  Time.parse($1) if $1
382
382
  end
383
383
  end
384
- end
384
+ end
@@ -11,7 +11,7 @@ module Feedzirra
11
11
  begin
12
12
  DateTime.parse(string).feed_utils_to_gm_time
13
13
  rescue
14
- puts "DATE CAN'T BE PARSED: [#{string}]"
14
+ warn "Failed to parse date #{string.inspect}"
15
15
  nil
16
16
  end
17
17
  end
@@ -50,23 +50,12 @@ module Feedzirra
50
50
  private
51
51
 
52
52
  def find_new_entries_for(feed)
53
- # this implementation is a hack, which is why it's so ugly.
54
- # it's to get around the fact that not all feeds have a published date.
55
- # however, they're always ordered with the newest one first.
56
- # So we go through the entries just parsed and insert each one as a new entry
57
- # until we get to one that has the same url as the the newest for the feed
58
- return feed.entries if self.entries.length == 0
59
- latest_entry = self.entries.first
60
- found_new_entries = []
61
- feed.entries.each do |entry|
62
- break if entry.url == latest_entry.url
63
- found_new_entries << entry
64
- end
65
- found_new_entries
53
+ # this algorithm does not optimize based on publication date, but always finds new entries
54
+ feed.entries.reject {|entry| self.entries.any? {|e| e.url == entry.url} }
66
55
  end
67
56
 
68
57
  def existing_entry?(test_entry)
69
58
  entries.any? { |entry| entry.url == test_entry.url }
70
59
  end
71
60
  end
72
- end
61
+ end
@@ -26,4 +26,4 @@ module Feedzirra
26
26
  end
27
27
  end
28
28
 
29
- end
29
+ end
@@ -0,0 +1,28 @@
1
+ require File.expand_path('./atom', File.dirname(__FILE__))
2
+
3
+ module Feedzirra
4
+ module Parser
5
+ class GoogleDocsAtom
6
+ include SAXMachine
7
+ include FeedUtilities
8
+ element :title
9
+ element :subtitle, :as => :description
10
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
11
+ element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
12
+ elements :link, :as => :links, :value => :href
13
+ elements :entry, :as => :entries, :class => GoogleDocsAtomEntry
14
+
15
+ def url
16
+ @url ||= links.first
17
+ end
18
+
19
+ def self.able_to_parse?(xml) #:nodoc:
20
+ %r{<id>https?://docs.google.com/.*\</id\>} =~ xml
21
+ end
22
+
23
+ def feed_url
24
+ @feed_url ||= links.first
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,29 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class GoogleDocsAtomEntry
4
+ include SAXMachine
5
+ include FeedEntryUtilities
6
+
7
+ element :title
8
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
9
+ element :name, :as => :author
10
+ element :content
11
+ element :summary
12
+ element :published
13
+ element :id, :as => :entry_id
14
+ element :created, :as => :published
15
+ element :issued, :as => :published
16
+ element :updated
17
+ element :modified, :as => :updated
18
+ elements :category, :as => :categories, :value => :term
19
+ elements :link, :as => :links, :value => :href
20
+ element :"docs:md5Checksum", :as => :checksum
21
+ element :"docs:filename", :as => :original_filename
22
+ element :"docs:suggestedFilename", :as => :suggested_filename
23
+
24
+ def url
25
+ @url ||= links.first
26
+ end
27
+ end
28
+ end
29
+ end
@@ -9,6 +9,9 @@ module Feedzirra
9
9
  autoload :ITunesRSSItem, 'feedzirra/parser/itunes_rss_item'
10
10
  autoload :ITunesRSSOwner, 'feedzirra/parser/itunes_rss_owner'
11
11
 
12
+ autoload :GoogleDocsAtom, 'feedzirra/parser/google_docs_atom'
13
+ autoload :GoogleDocsAtomEntry, 'feedzirra/parser/google_docs_atom_entry'
14
+
12
15
  autoload :Atom, 'feedzirra/parser/atom'
13
16
  autoload :AtomEntry, 'feedzirra/parser/atom_entry'
14
17
  autoload :AtomFeedBurner, 'feedzirra/parser/atom_feed_burner'
@@ -1,3 +1,3 @@
1
1
  module Feedzirra
2
- VERSION = '0.1.2'
2
+ VERSION = '0.1.3'
3
3
  end
@@ -97,6 +97,10 @@ describe Feedzirra::Feed do
97
97
  end
98
98
 
99
99
  describe "#determine_feed_parser_for_xml" do
100
+ it 'should return the Feedzirra::Parser::GoogleDocsAtom calss for a Google Docs atom feed' do
101
+ Feedzirra::Feed.determine_feed_parser_for_xml(sample_google_docs_list_feed).should == Feedzirra::Parser::GoogleDocsAtom
102
+ end
103
+
100
104
  it "should return the Feedzirra::Parser::Atom class for an atom feed" do
101
105
  Feedzirra::Feed.determine_feed_parser_for_xml(sample_atom_feed).should == Feedzirra::Parser::Atom
102
106
  end
@@ -590,4 +594,4 @@ describe Feedzirra::Feed do
590
594
  it "should return an return an array of feed objects if multiple feeds are passed in"
591
595
  end
592
596
  end
593
- end
597
+ end
@@ -0,0 +1,22 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::GoogleDocsAtomEntry do
4
+ describe 'parsing' do
5
+ before do
6
+ @feed = Feedzirra::Parser::GoogleDocsAtom.parse(sample_google_docs_list_feed)
7
+ @entry = @feed.entries.first
8
+ end
9
+
10
+ it 'should have the custom checksum element' do
11
+ @entry.checksum.should eql '2b01142f7481c7b056c4b410d28f33cf'
12
+ end
13
+
14
+ it 'should have the custom filename element' do
15
+ @entry.original_filename.should eql "MyFile.pdf"
16
+ end
17
+
18
+ it 'should have the custom suggested filename element' do
19
+ @entry.suggested_filename.should eql "TaxDocument.pdf"
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,31 @@
1
+ require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
+
3
+ describe Feedzirra::Parser::GoogleDocsAtom do
4
+ describe '.able_to_parser?' do
5
+ it 'should return true for Google Docs feed' do
6
+ Feedzirra::Parser::GoogleDocsAtom.should be_able_to_parse(sample_google_docs_list_feed)
7
+ end
8
+
9
+ it 'should not be able to parse another Atom feed' do
10
+ Feedzirra::Parser::GoogleDocsAtom.should_not be_able_to_parse(sample_atom_feed)
11
+ end
12
+ end
13
+
14
+ describe 'parsing' do
15
+ before do
16
+ @feed = Feedzirra::Parser::GoogleDocsAtom.parse(sample_google_docs_list_feed)
17
+ end
18
+
19
+ it 'should return a bunch of objects' do
20
+ @feed.entries.should_not be_empty
21
+ end
22
+
23
+ it 'should populate a title, interhited from the Atom entry' do
24
+ @feed.title.should_not be_nil
25
+ end
26
+
27
+ it 'should return a bunch of entries of type GoogleDocsAtomEntry' do
28
+ @feed.entries.first.should be_a Feedzirra::Parser::GoogleDocsAtomEntry
29
+ end
30
+ end
31
+ end
data/spec/spec_helper.rb CHANGED
@@ -64,6 +64,10 @@ end
64
64
  def sample_wfw_feed
65
65
  load_sample("PaulDixExplainsNothingWFW.xml")
66
66
  end
67
+
68
+ def sample_google_docs_list_feed
69
+ load_sample("GoogleDocsList.xml")
70
+ end
67
71
 
68
72
  # http://eigenclass.org/hiki/Changes+in+Ruby+1.9#l156
69
73
  # Default Time.to_s changed in 1.9, monkeypatching it back
@@ -71,4 +75,4 @@ class Time
71
75
  def to_s
72
76
  strftime("%a %b %d %H:%M:%S %Z %Y")
73
77
  end
74
- end
78
+ end
metadata CHANGED
@@ -1,181 +1,134 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: feedzirra
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 1
8
- - 2
9
- version: 0.1.2
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.3
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Paul Dix
13
9
  - Julien Kirch
14
10
  autorequire:
15
11
  bindir: bin
16
12
  cert_chain: []
17
-
18
- date: 2011-09-30 00:00:00 -04:00
19
- default_executable:
20
- dependencies:
21
- - !ruby/object:Gem::Dependency
13
+ date: 2011-09-30 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
22
16
  name: nokogiri
23
- prerelease: false
24
- requirement: &id001 !ruby/object:Gem::Requirement
17
+ requirement: &70245154994900 !ruby/object:Gem::Requirement
25
18
  none: false
26
- requirements:
27
- - - ">="
28
- - !ruby/object:Gem::Version
29
- segments:
30
- - 1
31
- - 4
32
- - 4
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
33
22
  version: 1.4.4
34
23
  type: :runtime
35
- version_requirements: *id001
36
- - !ruby/object:Gem::Dependency
37
- name: sax-machine
38
24
  prerelease: false
39
- requirement: &id002 !ruby/object:Gem::Requirement
25
+ version_requirements: *70245154994900
26
+ - !ruby/object:Gem::Dependency
27
+ name: sax-machine
28
+ requirement: &70245154994260 !ruby/object:Gem::Requirement
40
29
  none: false
41
- requirements:
30
+ requirements:
42
31
  - - ~>
43
- - !ruby/object:Gem::Version
44
- segments:
45
- - 0
46
- - 1
47
- - 0
32
+ - !ruby/object:Gem::Version
48
33
  version: 0.1.0
49
34
  type: :runtime
50
- version_requirements: *id002
51
- - !ruby/object:Gem::Dependency
52
- name: curb
53
35
  prerelease: false
54
- requirement: &id003 !ruby/object:Gem::Requirement
36
+ version_requirements: *70245154994260
37
+ - !ruby/object:Gem::Dependency
38
+ name: curb
39
+ requirement: &70245154993700 !ruby/object:Gem::Requirement
55
40
  none: false
56
- requirements:
41
+ requirements:
57
42
  - - ~>
58
- - !ruby/object:Gem::Version
59
- segments:
60
- - 0
61
- - 7
62
- - 15
43
+ - !ruby/object:Gem::Version
63
44
  version: 0.7.15
64
45
  type: :runtime
65
- version_requirements: *id003
66
- - !ruby/object:Gem::Dependency
67
- name: builder
68
46
  prerelease: false
69
- requirement: &id004 !ruby/object:Gem::Requirement
47
+ version_requirements: *70245154993700
48
+ - !ruby/object:Gem::Dependency
49
+ name: builder
50
+ requirement: &70245154993160 !ruby/object:Gem::Requirement
70
51
  none: false
71
- requirements:
72
- - - ">="
73
- - !ruby/object:Gem::Version
74
- segments:
75
- - 2
76
- - 1
77
- - 2
52
+ requirements:
53
+ - - ! '>='
54
+ - !ruby/object:Gem::Version
78
55
  version: 2.1.2
79
56
  type: :runtime
80
- version_requirements: *id004
81
- - !ruby/object:Gem::Dependency
82
- name: activesupport
83
57
  prerelease: false
84
- requirement: &id005 !ruby/object:Gem::Requirement
58
+ version_requirements: *70245154993160
59
+ - !ruby/object:Gem::Dependency
60
+ name: activesupport
61
+ requirement: &70245154845640 !ruby/object:Gem::Requirement
85
62
  none: false
86
- requirements:
63
+ requirements:
87
64
  - - ~>
88
- - !ruby/object:Gem::Version
89
- segments:
90
- - 3
91
- - 0
92
- - 8
93
- version: 3.0.8
65
+ - !ruby/object:Gem::Version
66
+ version: 3.1.1
94
67
  type: :runtime
95
- version_requirements: *id005
96
- - !ruby/object:Gem::Dependency
97
- name: loofah
98
68
  prerelease: false
99
- requirement: &id006 !ruby/object:Gem::Requirement
69
+ version_requirements: *70245154845640
70
+ - !ruby/object:Gem::Dependency
71
+ name: loofah
72
+ requirement: &70245154844980 !ruby/object:Gem::Requirement
100
73
  none: false
101
- requirements:
74
+ requirements:
102
75
  - - ~>
103
- - !ruby/object:Gem::Version
104
- segments:
105
- - 1
106
- - 2
107
- - 0
76
+ - !ruby/object:Gem::Version
108
77
  version: 1.2.0
109
78
  type: :runtime
110
- version_requirements: *id006
111
- - !ruby/object:Gem::Dependency
112
- name: rdoc
113
79
  prerelease: false
114
- requirement: &id007 !ruby/object:Gem::Requirement
80
+ version_requirements: *70245154844980
81
+ - !ruby/object:Gem::Dependency
82
+ name: rdoc
83
+ requirement: &70245154844420 !ruby/object:Gem::Requirement
115
84
  none: false
116
- requirements:
85
+ requirements:
117
86
  - - ~>
118
- - !ruby/object:Gem::Version
119
- segments:
120
- - 3
121
- - 8
122
- version: "3.8"
87
+ - !ruby/object:Gem::Version
88
+ version: '3.8'
123
89
  type: :runtime
124
- version_requirements: *id007
125
- - !ruby/object:Gem::Dependency
126
- name: rake
127
90
  prerelease: false
128
- requirement: &id008 !ruby/object:Gem::Requirement
91
+ version_requirements: *70245154844420
92
+ - !ruby/object:Gem::Dependency
93
+ name: rake
94
+ requirement: &70245154843940 !ruby/object:Gem::Requirement
129
95
  none: false
130
- requirements:
131
- - - ">="
132
- - !ruby/object:Gem::Version
133
- segments:
134
- - 0
135
- - 8
136
- - 7
96
+ requirements:
97
+ - - ! '>='
98
+ - !ruby/object:Gem::Version
137
99
  version: 0.8.7
138
100
  type: :runtime
139
- version_requirements: *id008
140
- - !ruby/object:Gem::Dependency
141
- name: i18n
142
101
  prerelease: false
143
- requirement: &id009 !ruby/object:Gem::Requirement
102
+ version_requirements: *70245154843940
103
+ - !ruby/object:Gem::Dependency
104
+ name: i18n
105
+ requirement: &70245154843440 !ruby/object:Gem::Requirement
144
106
  none: false
145
- requirements:
146
- - - ">="
147
- - !ruby/object:Gem::Version
148
- segments:
149
- - 0
150
- - 5
151
- - 0
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
152
110
  version: 0.5.0
153
111
  type: :runtime
154
- version_requirements: *id009
155
- - !ruby/object:Gem::Dependency
156
- name: rspec
157
112
  prerelease: false
158
- requirement: &id010 !ruby/object:Gem::Requirement
113
+ version_requirements: *70245154843440
114
+ - !ruby/object:Gem::Dependency
115
+ name: rspec
116
+ requirement: &70245154842920 !ruby/object:Gem::Requirement
159
117
  none: false
160
- requirements:
118
+ requirements:
161
119
  - - ~>
162
- - !ruby/object:Gem::Version
163
- segments:
164
- - 2
165
- - 6
166
- - 0
120
+ - !ruby/object:Gem::Version
167
121
  version: 2.6.0
168
122
  type: :development
169
- version_requirements: *id010
170
- description: "A feed fetching and parsing library that treats the internet like Godzilla treats Japan: it dominates and eats all."
123
+ prerelease: false
124
+ version_requirements: *70245154842920
125
+ description: ! 'A feed fetching and parsing library that treats the internet like
126
+ Godzilla treats Japan: it dominates and eats all.'
171
127
  email: feedzirra@googlegroups.com
172
128
  executables: []
173
-
174
129
  extensions: []
175
-
176
130
  extra_rdoc_files: []
177
-
178
- files:
131
+ files:
179
132
  - lib/feedzirra/core_ext/date.rb
180
133
  - lib/feedzirra/core_ext/string.rb
181
134
  - lib/feedzirra/core_ext.rb
@@ -186,6 +139,8 @@ files:
186
139
  - lib/feedzirra/parser/atom_entry.rb
187
140
  - lib/feedzirra/parser/atom_feed_burner.rb
188
141
  - lib/feedzirra/parser/atom_feed_burner_entry.rb
142
+ - lib/feedzirra/parser/google_docs_atom.rb
143
+ - lib/feedzirra/parser/google_docs_atom_entry.rb
189
144
  - lib/feedzirra/parser/itunes_rss.rb
190
145
  - lib/feedzirra/parser/itunes_rss_item.rb
191
146
  - lib/feedzirra/parser/itunes_rss_owner.rb
@@ -208,6 +163,8 @@ files:
208
163
  - spec/feedzirra/parser/atom_feed_burner_entry_spec.rb
209
164
  - spec/feedzirra/parser/atom_feed_burner_spec.rb
210
165
  - spec/feedzirra/parser/atom_spec.rb
166
+ - spec/feedzirra/parser/google_docs_atom_entry_spec.rb
167
+ - spec/feedzirra/parser/google_docs_atom_spec.rb
211
168
  - spec/feedzirra/parser/itunes_rss_item_spec.rb
212
169
  - spec/feedzirra/parser/itunes_rss_owner_spec.rb
213
170
  - spec/feedzirra/parser/itunes_rss_spec.rb
@@ -220,39 +177,31 @@ files:
220
177
  - README.rdoc
221
178
  - Rakefile
222
179
  - .rspec
223
- has_rdoc: true
224
180
  homepage: http://github.com/pauldix/feedzirra
225
181
  licenses: []
226
-
227
182
  post_install_message:
228
183
  rdoc_options: []
229
-
230
- require_paths:
184
+ require_paths:
231
185
  - lib
232
- required_ruby_version: !ruby/object:Gem::Requirement
186
+ required_ruby_version: !ruby/object:Gem::Requirement
233
187
  none: false
234
- requirements:
235
- - - ">="
236
- - !ruby/object:Gem::Version
237
- segments:
238
- - 0
239
- version: "0"
240
- required_rubygems_version: !ruby/object:Gem::Requirement
188
+ requirements:
189
+ - - ! '>='
190
+ - !ruby/object:Gem::Version
191
+ version: '0'
192
+ required_rubygems_version: !ruby/object:Gem::Requirement
241
193
  none: false
242
- requirements:
243
- - - ">="
244
- - !ruby/object:Gem::Version
245
- segments:
246
- - 0
247
- version: "0"
194
+ requirements:
195
+ - - ! '>='
196
+ - !ruby/object:Gem::Version
197
+ version: '0'
248
198
  requirements: []
249
-
250
199
  rubyforge_project:
251
- rubygems_version: 1.3.7
200
+ rubygems_version: 1.8.17
252
201
  signing_key:
253
202
  specification_version: 3
254
203
  summary: A feed fetching and parsing library
255
- test_files:
204
+ test_files:
256
205
  - spec/benchmarks/feed_benchmarks.rb
257
206
  - spec/benchmarks/feedzirra_benchmarks.rb
258
207
  - spec/benchmarks/fetching_benchmarks.rb
@@ -265,6 +214,8 @@ test_files:
265
214
  - spec/feedzirra/parser/atom_feed_burner_entry_spec.rb
266
215
  - spec/feedzirra/parser/atom_feed_burner_spec.rb
267
216
  - spec/feedzirra/parser/atom_spec.rb
217
+ - spec/feedzirra/parser/google_docs_atom_entry_spec.rb
218
+ - spec/feedzirra/parser/google_docs_atom_spec.rb
268
219
  - spec/feedzirra/parser/itunes_rss_item_spec.rb
269
220
  - spec/feedzirra/parser/itunes_rss_owner_spec.rb
270
221
  - spec/feedzirra/parser/itunes_rss_spec.rb